API for Developers¶

Here you will find the API provide for developers.

API for Developers
- Solver Data
- Profile Data

Solver Data¶

Class and functions related to storing the solver’s results.

Classes¶

`perprof.solver_data.SolverData` ¶

Store data from one solver.

Attributes:

algname (str) –
Name of the algorithm
data (pandas.DataFrame) –
DataFrame with columns
- “name”: The problem name.
- “exit”: Exit flag to determine successful termination.
- “time”: Elapsed time for the algorithm.
- “fval”: Function value at the solution.
- “primal”: Primal residual at the solution.
- “dual”: Dual residual at the solution.
success (list[str]) –
List of strings used to define what is a successful termination.

Source code in perprof/solver_data.py

class SolverData:
    """Store data from one solver.

    Attributes:
        algname (str):
            Name of the algorithm
        data (pandas.DataFrame):
            DataFrame with columns

            - "name": The problem name.
            - "exit": Exit flag to determine successful termination.
            - "time": Elapsed time for the algorithm.
            - "fval": Function value at the solution.
            - "primal": Primal residual at the solution.
            - "dual": Dual residual at the solution.
        success (list[str]):
            List of strings used to define what is a successful termination.
    """

    def __init__(
        self,
        algname,
        data,
        success=None,
        read_csv_args=None,
    ):
        """Initializes the SolverData from files or DataFrames.

        Args:
            algname (str):
                Name of the algorithm.
            data (Union[str, Path, pandas.DataFrame]):
                File name of csv to read or DataFrame.
            success (list[str]):
                Vector of flags considered as success.
            read_csv_args (dict):
                Arguments to be passed to `pandas.read_csv` if `data` is a file name.

        Raises:
            TypeError: If the data is not a str, Path, or pandas.DataFrame.
        """
        self.algname = algname
        if not success:
            success = ["c", "converged", "solved", "success"]
        self.success = success
        if isinstance(data, (str, Path)):
            if not read_csv_args:
                read_csv_args = {}
            self.data = pd.read_csv(data, **read_csv_args)
        elif isinstance(data, pd.DataFrame):
            self.data = data
        else:
            raise TypeError("Unexpected type for data input")

        # Make sure that a columns name, time and exit exist
        for col in ["name", "exit", "time"]:
            if col not in self.data.columns:
                raise ValueError(f"Missing column {col}")
        for col in ["fval", "primal", "dual"]:
            if col not in self.data:
                self.data[col] = np.nan

Functions¶

`init(algname, data, success=None, read_csv_args=None)` ¶

Parameters:

algname (str) –
Name of the algorithm.
data (Union[str, Path, pandas.DataFrame]) –
File name of csv to read or DataFrame.
success (list[str]) –
Vector of flags considered as success.
read_csv_args (dict) –
Arguments to be passed to pandas.read_csv if data is a file name.

Raises:

TypeError –
If the data is not a str, Path, or pandas.DataFrame.

Source code in perprof/solver_data.py

def __init__(
    self,
    algname,
    data,
    success=None,
    read_csv_args=None,
):
    """Initializes the SolverData from files or DataFrames.

    Args:
        algname (str):
            Name of the algorithm.
        data (Union[str, Path, pandas.DataFrame]):
            File name of csv to read or DataFrame.
        success (list[str]):
            Vector of flags considered as success.
        read_csv_args (dict):
            Arguments to be passed to `pandas.read_csv` if `data` is a file name.

    Raises:
        TypeError: If the data is not a str, Path, or pandas.DataFrame.
    """
    self.algname = algname
    if not success:
        success = ["c", "converged", "solved", "success"]
    self.success = success
    if isinstance(data, (str, Path)):
        if not read_csv_args:
            read_csv_args = {}
        self.data = pd.read_csv(data, **read_csv_args)
    elif isinstance(data, pd.DataFrame):
        self.data = data
    else:
        raise TypeError("Unexpected type for data input")

    # Make sure that a columns name, time and exit exist
    for col in ["name", "exit", "time"]:
        if col not in self.data.columns:
            raise ValueError(f"Missing column {col}")
    for col in ["fval", "primal", "dual"]:
        if col not in self.data:
            self.data[col] = np.nan

Functions¶

`perprof.solver_data.read_table(filename)` ¶

Read a table file as described in the documentation section File Format.

Parameters:

filename (str) –
Name of the table file.

Returns:

solver( SolverData ) –
Parsed data

Source code in perprof/solver_data.py

def read_table(filename):
    """
    Read a table file as described in the documentation section [File Format](file-format).

    Args:
        filename (str):
            Name of the table file.

    Returns:
        solver (SolverData): Parsed data
    """
    options = {
        "algname": None,
        "success": "c,converged,solved,success",
        "free_format": True,
        "col_name": 1,
        "col_exit": 2,
        "col_time": 3,
        "col_fval": 4,
        "col_primal": 5,
        "col_dual": 6,
    }

    with open(filename, encoding="utf-8") as file_:
        lines = file_.readlines()

        in_yaml = False
        for i, line in enumerate(lines):
            if line.strip() == "---":
                if in_yaml:
                    yaml_header = lines[0:i]
                    data_lines = lines[i + 1 :]
                    break
                in_yaml = True

    _parse_yaml(options, "".join(yaml_header))
    options["success"] = options["success"].split(",")
    data_header = ["name", "exit", "time", "fval", "primal", "dual"]
    header_order = [
        options["col_name"],
        options["col_exit"],
        options["col_time"],
        options["col_fval"],
        options["col_primal"],
        options["col_dual"],
    ]
    data_header = [data_header[i - 1] for i in header_order]
    data = pd.read_csv(
        StringIO("".join([" ".join(data_header) + "\n"] + data_lines)),
        delim_whitespace=True,
    )

    return SolverData(
        options["algname"],
        data,
        success=options["success"],
    )

Profile Data¶

Class to store the profile configuration and data.

Classes¶

`perprof.profile_data.ProfileData` ¶

Computes and stores the performance profile.

This class will store and compute the performance profile of given solvers. This is only the most basic profile choice, it only uses the time and the convergence status.

Attributes:

solvers (list[SolverData]) –
List of solver_data.SolverData objects associated with this performance profile.
subset (list[str]) –
If not None, used to restrict the problems in which the profile is created.
ratio (numpy.array) –
Ratio matrix computed using the best time for each problem.
breakpoints (numpy.array) –
Array of breakpoints obtained from the ratio matrix.
cumulative (numpy.array) –
Matrix of the cumulative distribution of problems. Dimensions and len(breakpoints) by len(solvers).

Source code in perprof/profile_data.py

class ProfileData:
    """Computes and stores the performance profile.

    This class will store and compute the performance profile of given solvers.
    This is only the most basic profile choice, it only uses the time and the convergence status.

    Attributes:
        solvers (list[SolverData]):
            List of solver_data.SolverData objects associated with this performance profile.
        subset (list[str]):
            If not None, used to restrict the problems in which the profile is created.
        ratio (numpy.array):
            Ratio matrix computed using the best time for each problem.
        breakpoints (numpy.array):
            Array of breakpoints obtained from the ratio matrix.
        cumulative (numpy.array):
            Matrix of the cumulative distribution of problems. Dimensions and len(breakpoints) by len(solvers).
    """

    def __init__(self, *solvers, subset=None):
        """Initialize the profile structure with solver_data.SolverData or files.

        Args:
            *solvers (Union[str, Path, SolverData]):
                Arguments of type str/Path to be read through solver_data.read_table or of type solver_data.SolverData. At least 2 arguments are required
            subset (list[str]):
                If not None, restricts the solvers data to only these problems.
        """
        self.solvers = []
        for solver in solvers:
            if isinstance(solver, (str, Path)):
                self.solvers.append(read_table(solver))
            elif isinstance(solver, SolverData):
                self.solvers.append(solver)
            else:
                raise ValueError(f"Unexpected type for solver input: {type(solver)}")
        self.subset = subset

        # Variables that will be filled by self.process()
        self._solvers_data = None
        self.ratio = None
        self._best_times = None
        self.breakpoints = None
        self.cumulative = None
        self.process()

    def process(self):
        """
        Process the solver data.

        If the solvers argument is updated, this should be called again.
        This returns the internal values and returns nothing.
        """
        if len(self.solvers) <= 1:
            raise ValueError("A Profile needs two solvers, at least")

        problems = set(self.solvers[0].data["name"].values)
        for solver in self.solvers[1:]:
            problems = problems.union(set(solver.data["name"].values))
        problems = sorted(list(problems))

        # create the reduced dataset: |subset| x |solvers|
        cols = ["name", "time"]
        self._solvers_data = self.solvers[0].data[cols].copy()
        for solver in self.solvers[1:]:
            self._solvers_data = self._solvers_data.join(
                solver.data[cols].set_index("name"),
                on="name",
                rsuffix="_" + solver.algname,
            )
        self._solvers_data = self._solvers_data.rename(
            columns={"time": "time_" + self.solvers[0].algname}
        )

        # set to inf the ones that fail convergence
        for solver in self.solvers:
            mask = ~solver.data.exit.isin(solver.success)
            if np.any(mask):
                self._solvers_data.loc[mask, "time_" + solver.algname] = float("inf")

        self._solvers_data.fillna(float("inf"))
        if self.subset:
            self._solvers_data = self._solvers_data[
                self._solvers_data.name.isin(self.subset)
            ]

        # Compute the minimum time
        self._best_times = self._solvers_data.iloc[:, 1:].min(axis=1).values

        # Compute the cumulative distribution
        self.ratio = (
            self._solvers_data.iloc[:, 1:] / self._best_times[:, np.newaxis]
        ).values
        self.ratio[np.isnan(self.ratio)] = float("inf")
        self.breakpoints = np.sort(np.unique(self.ratio.reshape(-1)))
        # This removes inf and nan
        self.breakpoints = self.breakpoints[self.breakpoints < float("inf")]
        # self
        self.cumulative = (
            self.ratio[np.newaxis, :, :] <= self.breakpoints[:, np.newaxis, np.newaxis]
        )
        self.cumulative = self.cumulative.sum(axis=1) / self.ratio.shape[0]

Functions¶

`init(*solvers, subset=None)` ¶

Parameters:

*solvers (Union[str, Path, SolverData]) –
Arguments of type str/Path to be read through solver_data.read_table or of type solver_data.SolverData. At least 2 arguments are required
subset (list[str]) –
If not None, restricts the solvers data to only these problems.

Source code in perprof/profile_data.py

def __init__(self, *solvers, subset=None):
    """Initialize the profile structure with solver_data.SolverData or files.

    Args:
        *solvers (Union[str, Path, SolverData]):
            Arguments of type str/Path to be read through solver_data.read_table or of type solver_data.SolverData. At least 2 arguments are required
        subset (list[str]):
            If not None, restricts the solvers data to only these problems.
    """
    self.solvers = []
    for solver in solvers:
        if isinstance(solver, (str, Path)):
            self.solvers.append(read_table(solver))
        elif isinstance(solver, SolverData):
            self.solvers.append(solver)
        else:
            raise ValueError(f"Unexpected type for solver input: {type(solver)}")
    self.subset = subset

    # Variables that will be filled by self.process()
    self._solvers_data = None
    self.ratio = None
    self._best_times = None
    self.breakpoints = None
    self.cumulative = None
    self.process()

`process()` ¶

Process the solver data.

If the solvers argument is updated, this should be called again. This returns the internal values and returns nothing.

Source code in perprof/profile_data.py

def process(self):
    """
    Process the solver data.

    If the solvers argument is updated, this should be called again.
    This returns the internal values and returns nothing.
    """
    if len(self.solvers) <= 1:
        raise ValueError("A Profile needs two solvers, at least")

    problems = set(self.solvers[0].data["name"].values)
    for solver in self.solvers[1:]:
        problems = problems.union(set(solver.data["name"].values))
    problems = sorted(list(problems))

    # create the reduced dataset: |subset| x |solvers|
    cols = ["name", "time"]
    self._solvers_data = self.solvers[0].data[cols].copy()
    for solver in self.solvers[1:]:
        self._solvers_data = self._solvers_data.join(
            solver.data[cols].set_index("name"),
            on="name",
            rsuffix="_" + solver.algname,
        )
    self._solvers_data = self._solvers_data.rename(
        columns={"time": "time_" + self.solvers[0].algname}
    )

    # set to inf the ones that fail convergence
    for solver in self.solvers:
        mask = ~solver.data.exit.isin(solver.success)
        if np.any(mask):
            self._solvers_data.loc[mask, "time_" + solver.algname] = float("inf")

    self._solvers_data.fillna(float("inf"))
    if self.subset:
        self._solvers_data = self._solvers_data[
            self._solvers_data.name.isin(self.subset)
        ]

    # Compute the minimum time
    self._best_times = self._solvers_data.iloc[:, 1:].min(axis=1).values

    # Compute the cumulative distribution
    self.ratio = (
        self._solvers_data.iloc[:, 1:] / self._best_times[:, np.newaxis]
    ).values
    self.ratio[np.isnan(self.ratio)] = float("inf")
    self.breakpoints = np.sort(np.unique(self.ratio.reshape(-1)))
    # This removes inf and nan
    self.breakpoints = self.breakpoints[self.breakpoints < float("inf")]
    # self
    self.cumulative = (
        self.ratio[np.newaxis, :, :] <= self.breakpoints[:, np.newaxis, np.newaxis]
    )
    self.cumulative = self.cumulative.sum(axis=1) / self.ratio.shape[0]

API for Developers¶

Solver Data¶

Classes¶

perprof.solver_data.SolverData ¶

Functions¶

__init__(algname, data, success=None, read_csv_args=None) ¶

Functions¶

perprof.solver_data.read_table(filename) ¶

Profile Data¶

Classes¶

perprof.profile_data.ProfileData ¶

Functions¶

__init__(*solvers, subset=None) ¶

process() ¶

Functions¶

`perprof.solver_data.SolverData` ¶

`init(algname, data, success=None, read_csv_args=None)` ¶

`perprof.solver_data.read_table(filename)` ¶

`perprof.profile_data.ProfileData` ¶

`init(*solvers, subset=None)` ¶

`process()` ¶