Source code for petab.observables

"""Functions for working with the PEtab observables table"""

from collections import OrderedDict
from typing import Union, List

import libsbml
import pandas as pd
import re
import sympy as sp

from . import lint, core
from .C import *  # noqa: F403

__all__ = ['create_observable_df',
           'get_formula_placeholders',
           'get_observable_df',
           'get_output_parameters',
           'get_placeholders',
           'write_observable_df']


[docs]def get_observable_df( observable_file: Union[str, pd.DataFrame, None] ) -> pd.DataFrame: """ Read the provided observable file into a ``pandas.Dataframe``. Arguments: observable_file: Name of the file to read from or pandas.Dataframe. Returns: Observable DataFrame """ if observable_file is None: return observable_file if isinstance(observable_file, str): observable_file = pd.read_csv(observable_file, sep='\t', float_precision='round_trip') lint.assert_no_leading_trailing_whitespace( observable_file.columns.values, "observable") if not isinstance(observable_file.index, pd.RangeIndex): observable_file.reset_index(inplace=True) try: observable_file.set_index([OBSERVABLE_ID], inplace=True) except KeyError: raise KeyError( f"Observable table missing mandatory field {OBSERVABLE_ID}.") return observable_file
[docs]def write_observable_df(df: pd.DataFrame, filename: str) -> None: """Write PEtab observable table Arguments: df: PEtab observable table filename: Destination file name """ with open(filename, 'w') as fh: df.to_csv(fh, sep='\t', index=True)
[docs]def get_output_parameters( observable_df: pd.DataFrame, sbml_model: libsbml.Model, observables: bool = True, noise: bool = True, ) -> List[str]: """Get output parameters Returns IDs of parameters used in observable and noise formulas that are not defined in the SBML model. Arguments: observable_df: PEtab observable table sbml_model: SBML model observables: Include parameters from observableFormulas noise: Include parameters from noiseFormulas Returns: List of output parameter IDs """ formulas = [] if observables: formulas.extend(observable_df[OBSERVABLE_FORMULA]) if noise and NOISE_FORMULA in observable_df: formulas.extend(observable_df[NOISE_FORMULA]) output_parameters = OrderedDict() for formula in formulas: free_syms = sorted(sp.sympify(formula).free_symbols, key=lambda symbol: symbol.name) for free_sym in free_syms: sym = str(free_sym) if sbml_model.getElementBySId(sym) is None and sym != 'time': output_parameters[sym] = None return list(output_parameters.keys())
[docs]def get_formula_placeholders(formula_string: str, observable_id: str, override_type: str) -> List[str]: """ Get placeholder variables in noise or observable definition for the given observable ID. Arguments: formula_string: observable formula observable_id: ID of current observable override_type: 'observable' or 'noise', depending on whether `formula` is for observable or for noise model Returns: List of placeholder parameter IDs in the order expected in the observableParameter column of the measurement table. """ if not formula_string: return [] if not isinstance(formula_string, str): return [] pattern = re.compile(r'(?:^|\W)(' + re.escape(override_type) + r'Parameter\d+_' + re.escape(observable_id) + r')(?=\W|$)') placeholder_set = set(pattern.findall(formula_string)) # need to sort and check that there are no gaps in numbering placeholders = [f"{override_type}Parameter{i}_{observable_id}" for i in range(1, len(placeholder_set) + 1)] if placeholder_set != set(placeholders): raise AssertionError("Non-consecutive numbering of placeholder " f"parameter for {placeholder_set}") return placeholders
[docs]def get_placeholders( observable_df: pd.DataFrame, observables: bool = True, noise: bool = True, ) -> List[str]: """Get all placeholder parameters from observable table observableFormulas and noiseFormulas Arguments: observable_df: PEtab observable table observables: Include parameters from observableFormulas noise: Include parameters from noiseFormulas Returns: List of placeholder parameters from observable table observableFormulas and noiseFormulas. """ # collect placeholder parameters overwritten by # {observable,noise}Parameters placeholder_types = [] formula_columns = [] if observables: placeholder_types.append('observable') formula_columns.append(OBSERVABLE_FORMULA) if noise: placeholder_types.append('noise') formula_columns.append(NOISE_FORMULA) placeholders = [] for _, row in observable_df.iterrows(): for placeholder_type, formula_column \ in zip(placeholder_types, formula_columns): if formula_column not in row: continue cur_placeholders = get_formula_placeholders( row[formula_column], row.name, placeholder_type) placeholders.extend(cur_placeholders) return core.unique_preserve_order(placeholders)
[docs]def create_observable_df() -> pd.DataFrame: """Create empty observable dataframe Returns: Created DataFrame """ df = pd.DataFrame(data={col: [] for col in OBSERVABLE_DF_COLS}) return df