Skip to content

API for Developers

Here you will find the API provide for developers.

Solver Data

Class and functions related to storing the solver’s results.

Classes

perprof.solver_data.SolverData

Store data from one solver.

Attributes:

  • algname (str) –

    Name of the algorithm

  • data (pandas.DataFrame) –

    DataFrame with columns

    • “name”: The problem name.
    • “exit”: Exit flag to determine successful termination.
    • “time”: Elapsed time for the algorithm.
    • “fval”: Function value at the solution.
    • “primal”: Primal residual at the solution.
    • “dual”: Dual residual at the solution.
  • success (list[str]) –

    List of strings used to define what is a successful termination.

Source code in perprof/solver_data.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class SolverData:
    """Store data from one solver.

    Attributes:
        algname (str):
            Name of the algorithm
        data (pandas.DataFrame):
            DataFrame with columns

            - "name": The problem name.
            - "exit": Exit flag to determine successful termination.
            - "time": Elapsed time for the algorithm.
            - "fval": Function value at the solution.
            - "primal": Primal residual at the solution.
            - "dual": Dual residual at the solution.
        success (list[str]):
            List of strings used to define what is a successful termination.
    """

    def __init__(
        self,
        algname,
        data,
        success=None,
        read_csv_args=None,
    ):
        """Initializes the SolverData from files or DataFrames.

        Args:
            algname (str):
                Name of the algorithm.
            data (Union[str, Path, pandas.DataFrame]):
                File name of csv to read or DataFrame.
            success (list[str]):
                Vector of flags considered as success.
            read_csv_args (dict):
                Arguments to be passed to `pandas.read_csv` if `data` is a file name.

        Raises:
            TypeError: If the data is not a str, Path, or pandas.DataFrame.
        """
        self.algname = algname
        if not success:
            success = ["c", "converged", "solved", "success"]
        self.success = success
        if isinstance(data, (str, Path)):
            if not read_csv_args:
                read_csv_args = {}
            self.data = pd.read_csv(data, **read_csv_args)
        elif isinstance(data, pd.DataFrame):
            self.data = data
        else:
            raise TypeError("Unexpected type for data input")

        # Make sure that a columns name, time and exit exist
        for col in ["name", "exit", "time"]:
            if col not in self.data.columns:
                raise ValueError(f"Missing column {col}")
        for col in ["fval", "primal", "dual"]:
            if col not in self.data:
                self.data[col] = np.nan
Functions
__init__(algname, data, success=None, read_csv_args=None)

Parameters:

  • algname (str) –

    Name of the algorithm.

  • data (Union[str, Path, pandas.DataFrame]) –

    File name of csv to read or DataFrame.

  • success (list[str]) –

    Vector of flags considered as success.

  • read_csv_args (dict) –

    Arguments to be passed to pandas.read_csv if data is a file name.

Raises:

  • TypeError

    If the data is not a str, Path, or pandas.DataFrame.

Source code in perprof/solver_data.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def __init__(
    self,
    algname,
    data,
    success=None,
    read_csv_args=None,
):
    """Initializes the SolverData from files or DataFrames.

    Args:
        algname (str):
            Name of the algorithm.
        data (Union[str, Path, pandas.DataFrame]):
            File name of csv to read or DataFrame.
        success (list[str]):
            Vector of flags considered as success.
        read_csv_args (dict):
            Arguments to be passed to `pandas.read_csv` if `data` is a file name.

    Raises:
        TypeError: If the data is not a str, Path, or pandas.DataFrame.
    """
    self.algname = algname
    if not success:
        success = ["c", "converged", "solved", "success"]
    self.success = success
    if isinstance(data, (str, Path)):
        if not read_csv_args:
            read_csv_args = {}
        self.data = pd.read_csv(data, **read_csv_args)
    elif isinstance(data, pd.DataFrame):
        self.data = data
    else:
        raise TypeError("Unexpected type for data input")

    # Make sure that a columns name, time and exit exist
    for col in ["name", "exit", "time"]:
        if col not in self.data.columns:
            raise ValueError(f"Missing column {col}")
    for col in ["fval", "primal", "dual"]:
        if col not in self.data:
            self.data[col] = np.nan

Functions

perprof.solver_data.read_table(filename)

Read a table file as described in the documentation section File Format.

Parameters:

  • filename (str) –

    Name of the table file.

Returns:

Source code in perprof/solver_data.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def read_table(filename):
    """
    Read a table file as described in the documentation section [File Format](file-format).

    Args:
        filename (str):
            Name of the table file.

    Returns:
        solver (SolverData): Parsed data
    """
    options = {
        "algname": None,
        "success": "c,converged,solved,success",
        "free_format": True,
        "col_name": 1,
        "col_exit": 2,
        "col_time": 3,
        "col_fval": 4,
        "col_primal": 5,
        "col_dual": 6,
    }

    with open(filename, encoding="utf-8") as file_:
        lines = file_.readlines()

        in_yaml = False
        for i, line in enumerate(lines):
            if line.strip() == "---":
                if in_yaml:
                    yaml_header = lines[0:i]
                    data_lines = lines[i + 1 :]
                    break
                in_yaml = True

    _parse_yaml(options, "".join(yaml_header))
    options["success"] = options["success"].split(",")
    data_header = ["name", "exit", "time", "fval", "primal", "dual"]
    header_order = [
        options["col_name"],
        options["col_exit"],
        options["col_time"],
        options["col_fval"],
        options["col_primal"],
        options["col_dual"],
    ]
    data_header = [data_header[i - 1] for i in header_order]
    data = pd.read_csv(
        StringIO("".join([" ".join(data_header) + "\n"] + data_lines)),
        delim_whitespace=True,
    )

    return SolverData(
        options["algname"],
        data,
        success=options["success"],
    )

Profile Data

Class to store the profile configuration and data.

Classes

perprof.profile_data.ProfileData

Computes and stores the performance profile.

This class will store and compute the performance profile of given solvers. This is only the most basic profile choice, it only uses the time and the convergence status.

Attributes:

  • solvers (list[SolverData]) –

    List of solver_data.SolverData objects associated with this performance profile.

  • subset (list[str]) –

    If not None, used to restrict the problems in which the profile is created.

  • ratio (numpy.array) –

    Ratio matrix computed using the best time for each problem.

  • breakpoints (numpy.array) –

    Array of breakpoints obtained from the ratio matrix.

  • cumulative (numpy.array) –

    Matrix of the cumulative distribution of problems. Dimensions and len(breakpoints) by len(solvers).

Source code in perprof/profile_data.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
class ProfileData:
    """Computes and stores the performance profile.

    This class will store and compute the performance profile of given solvers.
    This is only the most basic profile choice, it only uses the time and the convergence status.

    Attributes:
        solvers (list[SolverData]):
            List of solver_data.SolverData objects associated with this performance profile.
        subset (list[str]):
            If not None, used to restrict the problems in which the profile is created.
        ratio (numpy.array):
            Ratio matrix computed using the best time for each problem.
        breakpoints (numpy.array):
            Array of breakpoints obtained from the ratio matrix.
        cumulative (numpy.array):
            Matrix of the cumulative distribution of problems. Dimensions and len(breakpoints) by len(solvers).
    """

    def __init__(self, *solvers, subset=None):
        """Initialize the profile structure with solver_data.SolverData or files.

        Args:
            *solvers (Union[str, Path, SolverData]):
                Arguments of type str/Path to be read through solver_data.read_table or of type solver_data.SolverData. At least 2 arguments are required
            subset (list[str]):
                If not None, restricts the solvers data to only these problems.
        """
        self.solvers = []
        for solver in solvers:
            if isinstance(solver, (str, Path)):
                self.solvers.append(read_table(solver))
            elif isinstance(solver, SolverData):
                self.solvers.append(solver)
            else:
                raise ValueError(f"Unexpected type for solver input: {type(solver)}")
        self.subset = subset

        # Variables that will be filled by self.process()
        self._solvers_data = None
        self.ratio = None
        self._best_times = None
        self.breakpoints = None
        self.cumulative = None
        self.process()

    def process(self):
        """
        Process the solver data.

        If the solvers argument is updated, this should be called again.
        This returns the internal values and returns nothing.
        """
        if len(self.solvers) <= 1:
            raise ValueError("A Profile needs two solvers, at least")

        problems = set(self.solvers[0].data["name"].values)
        for solver in self.solvers[1:]:
            problems = problems.union(set(solver.data["name"].values))
        problems = sorted(list(problems))

        # create the reduced dataset: |subset| x |solvers|
        cols = ["name", "time"]
        self._solvers_data = self.solvers[0].data[cols].copy()
        for solver in self.solvers[1:]:
            self._solvers_data = self._solvers_data.join(
                solver.data[cols].set_index("name"),
                on="name",
                rsuffix="_" + solver.algname,
            )
        self._solvers_data = self._solvers_data.rename(
            columns={"time": "time_" + self.solvers[0].algname}
        )

        # set to inf the ones that fail convergence
        for solver in self.solvers:
            mask = ~solver.data.exit.isin(solver.success)
            if np.any(mask):
                self._solvers_data.loc[mask, "time_" + solver.algname] = float("inf")

        self._solvers_data.fillna(float("inf"))
        if self.subset:
            self._solvers_data = self._solvers_data[
                self._solvers_data.name.isin(self.subset)
            ]

        # Compute the minimum time
        self._best_times = self._solvers_data.iloc[:, 1:].min(axis=1).values

        # Compute the cumulative distribution
        self.ratio = (
            self._solvers_data.iloc[:, 1:] / self._best_times[:, np.newaxis]
        ).values
        self.ratio[np.isnan(self.ratio)] = float("inf")
        self.breakpoints = np.sort(np.unique(self.ratio.reshape(-1)))
        # This removes inf and nan
        self.breakpoints = self.breakpoints[self.breakpoints < float("inf")]
        # self
        self.cumulative = (
            self.ratio[np.newaxis, :, :] <= self.breakpoints[:, np.newaxis, np.newaxis]
        )
        self.cumulative = self.cumulative.sum(axis=1) / self.ratio.shape[0]
Functions
__init__(*solvers, subset=None)

Parameters:

  • *solvers (Union[str, Path, SolverData]) –

    Arguments of type str/Path to be read through solver_data.read_table or of type solver_data.SolverData. At least 2 arguments are required

  • subset (list[str]) –

    If not None, restricts the solvers data to only these problems.

Source code in perprof/profile_data.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def __init__(self, *solvers, subset=None):
    """Initialize the profile structure with solver_data.SolverData or files.

    Args:
        *solvers (Union[str, Path, SolverData]):
            Arguments of type str/Path to be read through solver_data.read_table or of type solver_data.SolverData. At least 2 arguments are required
        subset (list[str]):
            If not None, restricts the solvers data to only these problems.
    """
    self.solvers = []
    for solver in solvers:
        if isinstance(solver, (str, Path)):
            self.solvers.append(read_table(solver))
        elif isinstance(solver, SolverData):
            self.solvers.append(solver)
        else:
            raise ValueError(f"Unexpected type for solver input: {type(solver)}")
    self.subset = subset

    # Variables that will be filled by self.process()
    self._solvers_data = None
    self.ratio = None
    self._best_times = None
    self.breakpoints = None
    self.cumulative = None
    self.process()
process()

Process the solver data.

If the solvers argument is updated, this should be called again. This returns the internal values and returns nothing.

Source code in perprof/profile_data.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def process(self):
    """
    Process the solver data.

    If the solvers argument is updated, this should be called again.
    This returns the internal values and returns nothing.
    """
    if len(self.solvers) <= 1:
        raise ValueError("A Profile needs two solvers, at least")

    problems = set(self.solvers[0].data["name"].values)
    for solver in self.solvers[1:]:
        problems = problems.union(set(solver.data["name"].values))
    problems = sorted(list(problems))

    # create the reduced dataset: |subset| x |solvers|
    cols = ["name", "time"]
    self._solvers_data = self.solvers[0].data[cols].copy()
    for solver in self.solvers[1:]:
        self._solvers_data = self._solvers_data.join(
            solver.data[cols].set_index("name"),
            on="name",
            rsuffix="_" + solver.algname,
        )
    self._solvers_data = self._solvers_data.rename(
        columns={"time": "time_" + self.solvers[0].algname}
    )

    # set to inf the ones that fail convergence
    for solver in self.solvers:
        mask = ~solver.data.exit.isin(solver.success)
        if np.any(mask):
            self._solvers_data.loc[mask, "time_" + solver.algname] = float("inf")

    self._solvers_data.fillna(float("inf"))
    if self.subset:
        self._solvers_data = self._solvers_data[
            self._solvers_data.name.isin(self.subset)
        ]

    # Compute the minimum time
    self._best_times = self._solvers_data.iloc[:, 1:].min(axis=1).values

    # Compute the cumulative distribution
    self.ratio = (
        self._solvers_data.iloc[:, 1:] / self._best_times[:, np.newaxis]
    ).values
    self.ratio[np.isnan(self.ratio)] = float("inf")
    self.breakpoints = np.sort(np.unique(self.ratio.reshape(-1)))
    # This removes inf and nan
    self.breakpoints = self.breakpoints[self.breakpoints < float("inf")]
    # self
    self.cumulative = (
        self.ratio[np.newaxis, :, :] <= self.breakpoints[:, np.newaxis, np.newaxis]
    )
    self.cumulative = self.cumulative.sum(axis=1) / self.ratio.shape[0]

Functions