Source code for petab.visualize.helper_functions

"""
This file should contain the functions, which PEtab internally needs for
plotting, but which are not meant to be used by non-developers and should
hence not be directly visible/usable when using `import petab.visualize`.
"""

import functools
import warnings
from numbers import Number


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import petab
import seaborn as sns

from .plotting_config import plot_lowlevel
from ..C import *

from typing import Dict, List, Optional, Tuple, Union

# sns.set() This messes up plotting settings even if one just imports this file

# for typehints
IdsList = List[str]
NumList = List[int]
__all__ = ['check_ex_exp_columns',
           'check_ex_visu_columns',
           'check_vis_spec_consistency',
           'create_dataset_id_list',
           'create_dataset_id_list_new',
           'create_figure',
           'create_or_update_vis_spec',
           'expand_vis_spec_settings',
           'generate_dataset_id_col',
           'get_data_to_plot',
           'get_default_vis_specs',
           'get_vis_spec_dependent_columns_dict',
           'handle_dataset_plot',
           'import_from_files',
           'matches_plot_spec']


[docs]def import_from_files( data_file_path: str, condition_file_path: str, simulation_file_path: str, dataset_id_list: List[IdsList], sim_cond_id_list: List[IdsList], sim_cond_num_list: List[NumList], observable_id_list: List[IdsList], observable_num_list: List[NumList], plotted_noise: str, visualization_file_path: str = None ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: """ Helper function for plotting data and simulations, which imports data from PEtab files. If `visualization_file_path` is not provided, the visualization specification DataFrame will be generated automatically. For documentation, see main function plot_data_and_simulation() Returns: A tuple of experimental data, experimental conditions, visualization specification and simulation data DataFrames. """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) # import measurement data and experimental condition exp_data = petab.get_measurement_df(data_file_path) exp_conditions = petab.get_condition_df(condition_file_path) # import visualization specification, if file was specified if visualization_file_path: vis_spec = petab.get_visualization_df(visualization_file_path) else: # create them based on simulation conditions vis_spec, exp_data = get_default_vis_specs(exp_data, exp_conditions, dataset_id_list, sim_cond_id_list, sim_cond_num_list, observable_id_list, observable_num_list, plotted_noise) # import simulation file, if file was specified if simulation_file_path != '': sim_data = petab.get_simulation_df(simulation_file_path) else: sim_data = None return exp_data, exp_conditions, vis_spec, sim_data
[docs]def check_vis_spec_consistency( exp_data: pd.DataFrame, dataset_id_list: Optional[List[IdsList]] = None, sim_cond_id_list: Optional[List[IdsList]] = None, sim_cond_num_list: Optional[List[NumList]] = None, observable_id_list: Optional[List[IdsList]] = None, observable_num_list: Optional[List[NumList]] = None) -> str: """ Helper function for plotting data and simulations, which checks the visualization setting, if no visualization specification file is provided. For documentation, see main function plot_data_and_simulation() Returns: Specifies the grouping of data to plot. """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) # We have no vis_spec file. Check how data should be grouped group_by = '' if dataset_id_list is not None: group_by += 'dataset' # check whether grouping by simulation condition should be done if sim_cond_id_list is not None and sim_cond_num_list is not None: raise NotImplementedError( "Either specify a list of simulation condition IDs or a list of " "simulation condition numbers, but not both. Stopping.") if sim_cond_id_list is not None or sim_cond_num_list is not None: group_by += 'simulation' # check whether grouping by observable should be done if observable_id_list is not None and observable_num_list is not None: raise NotImplementedError( "Either specify a list of observable IDs or a list " "of observable numbers, but not both. Stopping.") if observable_id_list is not None or observable_num_list is not None: group_by += 'observable' # consistency check. Warn or error, if grouping not clear if group_by == 'datasetsimulation': warnings.warn("Found grouping by datasetId and simulation condition. " "Using datasetId, omitting simulation condition.") group_by = 'dataset' elif group_by == 'datasetobservable': warnings.warn("Found grouping by datasetId and observable. " "Using datasetId, omitting observable.") group_by = 'dataset' elif group_by == 'datasetsimulationobservable': warnings.warn("Found grouping by datasetId, simulation condition, and " "observable. Using datasetId, omitting simulation " "condition and observable.") group_by = 'dataset' elif group_by == 'simulationobservable': raise NotImplementedError( "Plotting without visualization specification file and datasetId " "can be performed via grouping by simulation conditions OR " "observables, but not both. Stopping.") elif group_by in ['simulation', 'observable', 'dataset']: pass # if group_by is still empty (if visuSpec file is available but datasetId # is not available), default: observables elif group_by == '': group_by = 'observable' warnings.warn('Default plotting: grouping by observable. If you want ' 'to specify another grouping option, please add ' '\'datasetId\' columns.') else: raise NotImplementedError( "No information provided, how to plot data. Stopping.") if group_by != 'dataset': # group plots not by dataset. Check, whether such a column would # have been available (and give a warning, if so) if 'datasetId' in exp_data.columns: warnings.warn("DatasetIds would have been available, but other " "grouping was requested. Consider using datasetId.") else: if 'datasetId' not in exp_data.columns: raise NotImplementedError( "Data should be grouped by datasetId, but no datasetId is " "given in the measurement file. Stopping.") return group_by
[docs]def create_dataset_id_list( simcond_id_list: List[IdsList], simcond_num_list: List[NumList], observable_id_list: List[IdsList], observable_num_list: List[NumList], exp_data: pd.DataFrame, exp_conditions: pd.DataFrame, group_by: str) -> Tuple[pd.DataFrame, List[IdsList], Dict, Dict]: """ Create dataset id list and corresponding plot legends. Additionally, update/create DATASET_ID column of exp_data Parameters: group_by: defines grouping of data to plot Returns: A tuple of experimental DataFrame, list of datasetIds and dictionary of plot legends, corresponding to the datasetIds For additional documentation, see main function plot_data_and_simulation() """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) # create a column of dummy datasetIDs and legend entries: preallocate dataset_id_column = [] legend_dict = {} yvalues_dict = {} # loop over experimental data table, create datasetId for each entry tmp_simcond = list(exp_data[SIMULATION_CONDITION_ID]) tmp_obs = list(exp_data[OBSERVABLE_ID]) for ind, cond_id in enumerate(tmp_simcond): # create and add dummy datasetID dataset_id = cond_id + '_' + tmp_obs[ind] dataset_id_column.append(dataset_id) # create nicer legend entries from condition names instead of IDs if dataset_id not in legend_dict.keys(): tmp = exp_conditions.loc[exp_conditions.index == cond_id] if CONDITION_NAME not in tmp.columns or tmp[ CONDITION_NAME].isna().any(): tmp.loc[:, CONDITION_NAME] = tmp.index.tolist() legend_dict[dataset_id] = tmp[CONDITION_NAME][0] + ' - ' + \ tmp_obs[ind] yvalues_dict[dataset_id] = tmp_obs[ind] # add these column to the measurement table (possibly overwrite) if DATASET_ID in exp_data.columns: exp_data = exp_data.drop(DATASET_ID, axis=1) exp_data.insert(loc=exp_data.columns.size, column=DATASET_ID, value=dataset_id_column) # make dummy dataset names unique and iterable unique_dataset_list = functools.reduce( lambda tmp, x: tmp.append(x) or tmp if x not in tmp else tmp, list(exp_data[DATASET_ID]), []) unique_simcond_list = functools.reduce( lambda tmp, x: tmp.append(x) or tmp if x not in tmp else tmp, list(exp_data[SIMULATION_CONDITION_ID]), []) unique_obs_list = functools.reduce( lambda tmp, x: tmp.append(x) or tmp if x not in tmp else tmp, list(exp_data[OBSERVABLE_ID]), []) # we will need a dictionary for mapping simulation conditions # /observables to datasets ds_dict = {} dataset_id_list = [] if group_by == 'simulation': if simcond_id_list is None: simcond_id_list = [[unique_simcond_list[i_cond] for i_cond in i_cond_list] for i_cond_list in simcond_num_list] for simcond in unique_simcond_list: # ds_dict[simcond] = [ds for ds in unique_dataset_list if ds[ # 0:len(simcond)+3] == simcond + ' - '] # ds_dict[simcond] = [ds for ds in unique_dataset_list if ds[ # 0:len(simcond) + 3] == simcond + '_'] ds_dict[simcond] = [ds for ds in unique_dataset_list if ds[ 0:len(simcond)] == simcond] grouped_list = simcond_id_list elif group_by == 'observable': if not observable_id_list and not observable_num_list: observable_id_list = [unique_obs_list] if observable_id_list is None: observable_id_list = [[unique_obs_list[i_obs] for i_obs in i_obs_list] for i_obs_list in observable_num_list] for observable in unique_obs_list: # ds_dict[observable] = [ds for ds in unique_dataset_list if ds[ # -len(observable)-3:] == ' - ' + observable] ds_dict[observable] = [ds for ds in unique_dataset_list if ds[ -len(observable) - 1:] == '_' + observable] grouped_list = observable_id_list else: raise NotImplementedError( "Very, very weird error. Should not have happened. Something " "went wrong in how datasets should be grouped. Very weird...") for sublist in grouped_list: datasets_for_this_plot = [dset for sublist_entry in sublist for dset in ds_dict[sublist_entry]] dataset_id_list.append(datasets_for_this_plot) return exp_data, dataset_id_list, legend_dict, yvalues_dict
[docs]def generate_dataset_id_col(exp_data: pd.DataFrame) -> List[str]: """ Generate DATASET_ID column from condition_ids and observable_ids. Parameters ---------- exp_data: A measurement (simulation) DataFrame in the PEtab format. Returns ------- A list with generated datasetIds for each entry in the measurement (simulation) DataFrame """ # create a column of dummy datasetIDs and legend entries: preallocate dataset_id_column = [] # loop over experimental data table, create datasetId for each entry tmp_simcond = list(exp_data[SIMULATION_CONDITION_ID]) tmp_obs = list(exp_data[OBSERVABLE_ID]) for ind, cond_id in enumerate(tmp_simcond): # create and add dummy datasetID dataset_id = cond_id + '_' + tmp_obs[ind] dataset_id_column.append(dataset_id) return dataset_id_column
[docs]def create_dataset_id_list_new(df: pd.DataFrame, group_by: str, id_list: List[IdsList] ) -> List[IdsList]: """ Create dataset ID list from a list of simulation condition IDs or observable IDs. Parameters: df: Measurements or simulations DataFrame. group_by: Defines grouping of data to plot. id_list: Grouping list. Each sublist corresponds to a subplot in a figure, and contains the IDs of observables or simulation conditions for the subplot. Returns: A list of datasetIds """ if DATASET_ID not in df.columns: raise ValueError(f'{DATASET_ID} column must be in exp_data DataFrame') dataset_id_list = [] if group_by == 'simulation': groupping_col = SIMULATION_CONDITION_ID elif group_by == 'observable': groupping_col = OBSERVABLE_ID if id_list is None: # this is the default case. If no grouping is specified, # all observables are plotted. One observable per plot. unique_obs_list = df[OBSERVABLE_ID].unique() id_list = [[obs_id] for obs_id in unique_obs_list] else: raise ValueError for sublist in id_list: plot_id_list = [] for cond_id in sublist: plot_id_list.extend(list( df[df[groupping_col] == cond_id][ DATASET_ID].unique())) dataset_id_list.append(plot_id_list) return dataset_id_list
[docs]def create_figure( uni_plot_ids: np.ndarray, plots_to_file: bool) -> Tuple[plt.Figure, Union[Dict[str, plt.Subplot], 'np.ndarray[plt.Subplot]']]: """ Helper function for plotting data and simulations, open figure and axes Parameters ---------- uni_plot_ids: Array with unique plot indices plots_to_file: Indicator if plots are saved to file Returns ------- fig: Figure object of the created plot. ax: Axis object of the created plot. """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) # Set Options for plots # possible options: see: plt.rcParams.keys() plt.rcParams['font.size'] = 10 plt.rcParams['axes.titlesize'] = 10 plt.rcParams['figure.figsize'] = [20, 10] plt.rcParams['errorbar.capsize'] = 2 # Set Colormap sns.set(style="ticks", palette="colorblind") # Check if plots are saved to file and return single subplot axis if plots_to_file: num_subplot = 1 else: num_subplot = len(uni_plot_ids) # compute, how many rows and columns we need for the subplots num_row = int(np.round(np.sqrt(num_subplot))) num_col = int(np.ceil(num_subplot / num_row)) fig, axes = plt.subplots(num_row, num_col, squeeze=False) if not plots_to_file: for ax in axes.flat[num_subplot:]: ax.remove() axes = dict(zip(uni_plot_ids, axes.flat)) return fig, axes
[docs]def get_default_vis_specs( exp_data: pd.DataFrame, exp_conditions: pd.DataFrame, dataset_id_list: Optional[List[IdsList]] = None, sim_cond_id_list: Optional[List[IdsList]] = None, sim_cond_num_list: Optional[List[NumList]] = None, observable_id_list: Optional[List[IdsList]] = None, observable_num_list: Optional[List[NumList]] = None, plotted_noise: Optional[str] = MEAN_AND_SD ) -> Tuple[pd.DataFrame, pd.DataFrame]: """ Helper function for plotting data and simulations, which creates a default visualization table and updates/creates DATASET_ID column of exp_data Returns: A tuple of visualization specification DataFrame and experimental DataFrame. For documentation, see main function plot_data_and_simulation() """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) # check consistency of settings group_by = check_vis_spec_consistency( exp_data, dataset_id_list, sim_cond_id_list, sim_cond_num_list, observable_id_list, observable_num_list) if group_by != 'dataset': # datasetId_list will be created (possibly overwriting previous list # - only in the local variable, not in the tsv-file) exp_data, dataset_id_list, legend_dict, _ = \ create_dataset_id_list(sim_cond_id_list, sim_cond_num_list, observable_id_list, observable_num_list, exp_data, exp_conditions, group_by) dataset_id_column = [i_dataset for sublist in dataset_id_list for i_dataset in sublist] if group_by != 'dataset': dataset_label_column = [legend_dict[i_dataset] for sublist in dataset_id_list for i_dataset in sublist] else: dataset_label_column = dataset_id_column # get number of plots and create plotId-lists plot_id_list = [f'plot{ind+1}' for ind, inner_list in enumerate( dataset_id_list) for _ in inner_list] # create dataframe vis_spec = pd.DataFrame({PLOT_ID: plot_id_list, DATASET_ID: dataset_id_column, LEGEND_ENTRY: dataset_label_column}) # fill columns with default values fill_vis_spec = ((2, Y_LABEL, 'value'), (2, Y_OFFSET, 0), (2, Y_VALUES, ''), (2, X_LABEL, 'time'), (2, X_OFFSET, 0), (2, X_VALUES, 'time'), (1, Y_SCALE, LIN), (1, X_SCALE, LIN), (0, PLOT_TYPE_DATA, plotted_noise), (0, PLOT_TYPE_SIMULATION, LINE_PLOT), (0, PLOT_NAME, '')) for pos, col, val in fill_vis_spec: vis_spec.insert(loc=pos, column=col, value=val) return vis_spec, exp_data
[docs]def get_vis_spec_dependent_columns_dict( exp_data: pd.DataFrame, exp_conditions: pd.DataFrame, dataset_id_list: Optional[List[IdsList]] = None, sim_cond_id_list: Optional[List[IdsList]] = None, sim_cond_num_list: Optional[List[NumList]] = None, observable_id_list: Optional[List[IdsList]] = None, observable_num_list: Optional[List[NumList]] = None ) -> Tuple[pd.DataFrame, Dict]: """ Helper function for creating values for columns PLOT_ID, DATASET_ID, LEGEND_ENTRY, Y_VALUES for visualization specification file. DATASET_ID column of exp_data is updated accordingly. Returns: A tuple of experimental DataFrame and a dictionary with values for columns PLOT_ID, DATASET_ID, LEGEND_ENTRY, Y_VALUES for visualization specification file. """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) # check consistency of settings group_by = check_vis_spec_consistency( exp_data, dataset_id_list, sim_cond_id_list, sim_cond_num_list, observable_id_list, observable_num_list) if group_by != 'dataset': # datasetId_list will be created (possibly overwriting previous list # - only in the local variable, not in the tsv-file) exp_data, dataset_id_list, legend_dict, yvalues_dict = \ create_dataset_id_list(sim_cond_id_list, sim_cond_num_list, observable_id_list, observable_num_list, exp_data, exp_conditions, group_by) dataset_id_column = [i_dataset for sublist in dataset_id_list for i_dataset in sublist] if group_by != 'dataset': dataset_label_column = [legend_dict[i_dataset] for sublist in dataset_id_list for i_dataset in sublist] yvalues_column = [yvalues_dict[i_dataset] for sublist in dataset_id_list for i_dataset in sublist] else: dataset_label_column = dataset_id_column yvalues_column = ['']*len(dataset_id_column) # get number of plots and create plotId-lists if group_by == 'observable': obs_uni = list(np.unique(exp_data[OBSERVABLE_ID])) # copy of dataset ids, for later replacing with plot ids plot_id_column = dataset_id_column.copy() for i_obs in range(0, len(obs_uni)): # get dataset_ids which include observable name matching = [s for s in dataset_id_column if obs_uni[i_obs] in s] # replace the dataset ids with plot id with grouping of observables for m_i in matching: plot_id_column = [sub.replace(m_i, 'plot%s' % str(i_obs + 1)) for sub in plot_id_column] else: # get number of plots and create plotId-lists plot_id_column = ['plot%s' % str(ind + 1) for ind, inner_list in enumerate(dataset_id_list) for _ in inner_list] columns_dict = {PLOT_ID: plot_id_column, DATASET_ID: dataset_id_column, LEGEND_ENTRY: dataset_label_column, Y_VALUES: yvalues_column} return exp_data, columns_dict
[docs]def expand_vis_spec_settings(vis_spec: pd.DataFrame, columns_dict): """ Expand visualization specification. only makes sense if DATASET_ID is not in vis_spec.columns? Parameters ---------- vis_spec: A visualization specification DataFrame. columns_dict: Returns ------- Expanded visualization specification DataFrame """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) columns_to_expand = [PLOT_NAME, PLOT_TYPE_SIMULATION, PLOT_TYPE_DATA, X_VALUES, X_OFFSET, X_LABEL, X_SCALE, Y_OFFSET, Y_LABEL, Y_SCALE, LEGEND_ENTRY] for column in vis_spec.columns: if column in columns_to_expand: column_entries = [] if Y_VALUES in vis_spec.columns: for i, plot_id in enumerate(columns_dict[PLOT_ID]): select_conditions = (vis_spec[PLOT_ID] == plot_id) & ( vis_spec[Y_VALUES] == columns_dict[Y_VALUES][i]) column_entries.append( vis_spec[select_conditions].loc[:, column].values[0]) else: # get unique plotIDs from visspecfile vis_plotid_u = vis_spec[PLOT_ID].unique() auto_plotid_u = list(set(columns_dict[PLOT_ID])) # if number of plotIds does not coincide (autmatically # generated plotIds according to observable grouping, vs # plotIds specified in the visu_Spec) if len(vis_plotid_u) is not len(auto_plotid_u): # which items are not in visu_plotId: del_plotid = \ list(set(columns_dict[PLOT_ID]) - set(vis_plotid_u)) # replace automatically generated plotIds with 'plot1' from # visu file for d_i in del_plotid: columns_dict[PLOT_ID] = [ sub.replace(d_i, vis_plotid_u[0]) for sub in columns_dict[PLOT_ID]] for plot_id in columns_dict[PLOT_ID]: select_conditions = vis_spec[PLOT_ID] == plot_id column_entries.append( vis_spec[select_conditions].loc[:, column].values[0]) columns_dict[column] = column_entries vis_spec = pd.DataFrame(columns_dict) return vis_spec
[docs]def create_or_update_vis_spec( exp_data: pd.DataFrame, exp_conditions: pd.DataFrame, vis_spec: Optional[pd.DataFrame] = None, dataset_id_list: Optional[List[IdsList]] = None, sim_cond_id_list: Optional[List[IdsList]] = None, sim_cond_num_list: Optional[List[NumList]] = None, observable_id_list: Optional[List[IdsList]] = None, observable_num_list: Optional[List[NumList]] = None, plotted_noise: Optional[str] = MEAN_AND_SD): """ Helper function for plotting data and simulations, which updates vis_spec file if necessary or creates a default visualization table and updates/creates DATASET_ID column of exp_data. As a result, a visualization specification file exists with columns PLOT_ID, DATASET_ID, Y_VALUES and LEGEND_ENTRY Returns: A tuple of visualization specification DataFrame and experimental DataFrame. """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) if vis_spec is None: # create dataframe exp_data, columns_dict = \ get_vis_spec_dependent_columns_dict(exp_data, exp_conditions, dataset_id_list, sim_cond_id_list, sim_cond_num_list, observable_id_list, observable_num_list) vis_spec = pd.DataFrame(columns_dict) else: # TODO: do validation issue #190 # so, plotid is definitely there if DATASET_ID not in vis_spec.columns: if Y_VALUES in vis_spec.columns: plot_id_list = np.unique(vis_spec[PLOT_ID]) observable_id_list = [vis_spec[vis_spec[PLOT_ID] == plot_id].loc[:, Y_VALUES].values for plot_id in plot_id_list] exp_data, columns_dict = \ get_vis_spec_dependent_columns_dict( exp_data, exp_conditions, observable_id_list=observable_id_list) else: # PLOT_ID is there, but NOT DATASET_ID and not Y_VALUES, # but potentially some settings. # TODO: multiple plotids with diff settings exp_data, columns_dict = \ get_vis_spec_dependent_columns_dict( exp_data, exp_conditions) # get other settings that could have potentially been there # and expand according to plot_id_column vis_spec = expand_vis_spec_settings(vis_spec, columns_dict) # if dataset_id is there, then nothing to expand? if PLOT_TYPE_DATA not in vis_spec.columns: vis_spec[PLOT_TYPE_DATA] = plotted_noise # check columns, and add non-mandatory default columns vis_spec = check_ex_visu_columns(vis_spec, exp_data, exp_conditions) return exp_data, vis_spec
[docs]def check_ex_visu_columns(vis_spec: pd.DataFrame, exp_data: pd.DataFrame, exp_conditions: pd.DataFrame) -> pd.DataFrame: """ Check the columns in Visu_Spec file, if non-mandotory columns does not exist, create default columns Returns: Updated visualization specification DataFrame """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) if PLOT_NAME not in vis_spec.columns: vis_spec[PLOT_NAME] = '' if PLOT_TYPE_SIMULATION not in vis_spec.columns: vis_spec[PLOT_TYPE_SIMULATION] = LINE_PLOT if PLOT_TYPE_DATA not in vis_spec.columns: vis_spec[PLOT_TYPE_DATA] = MEAN_AND_SD if X_VALUES not in vis_spec.columns: # check if time is constant in expdata (if yes, plot dose response) # otherwise plot time series uni_time = pd.unique(exp_data[TIME]) if len(uni_time) > 1: vis_spec[X_VALUES] = 'time' elif len(uni_time) == 1: if np.isin(exp_conditions.columns.values, 'conditionName').any(): conds = exp_conditions.columns.drop('conditionName') else: conds = exp_conditions.columns # default: first dose-response condition (first from condition # table) is plotted # TODO: expand to automatic plotting of all conditions vis_spec[X_VALUES] = conds[0] vis_spec[X_LABEL] = conds[0] warnings.warn( '\n First dose-response condition is plotted. \n Check which ' 'condition you want to plot \n and possibly enter it into the ' 'column *xValues* \n in the visualization table.') else: raise NotImplementedError( 'Strange Error. There is no time defined in the measurement ' 'table?') if X_OFFSET not in vis_spec.columns: vis_spec[X_OFFSET] = 0 if X_LABEL not in vis_spec.columns: vis_spec[X_LABEL] = 'time' vis_spec.loc[vis_spec[X_VALUES] != 'time', X_LABEL] = 'condition' if X_SCALE not in vis_spec.columns: vis_spec[X_SCALE] = LIN if Y_VALUES not in vis_spec.columns: vis_spec[Y_VALUES] = '' if Y_OFFSET not in vis_spec.columns: vis_spec[Y_OFFSET] = 0 if Y_LABEL not in vis_spec.columns: vis_spec[Y_LABEL] = 'value' if Y_SCALE not in vis_spec.columns: vis_spec[Y_SCALE] = LIN if LEGEND_ENTRY not in vis_spec.columns: vis_spec[LEGEND_ENTRY] = vis_spec[DATASET_ID] return vis_spec
[docs]def check_ex_exp_columns( exp_data: pd.DataFrame, dataset_id_list: List[IdsList], sim_cond_id_list: List[IdsList], sim_cond_num_list: List[NumList], observable_id_list: List[IdsList], observable_num_list: List[NumList], exp_conditions: pd.DataFrame, sim: Optional[bool] = False ) -> Tuple[pd.DataFrame, List[IdsList], Dict]: """ Check the columns in measurement file, if non-mandotory columns does not exist, create default columns Returns: A tuple of experimental DataFrame, list of datasetIds and dictionary of plot legends, corresponding to the datasetIds """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) data_type = MEASUREMENT if sim: data_type = SIMULATION # mandatory columns if OBSERVABLE_ID not in exp_data.columns: raise NotImplementedError( f"Column \'observableId\' is missing in {data_type} file. ") if SIMULATION_CONDITION_ID not in exp_data.columns: raise NotImplementedError( f"Column \'simulationConditionId\' is missing in {data_type} " f"file. ") if data_type not in exp_data.columns: raise NotImplementedError( f"Column \'{data_type}\' is missing in {data_type} " f"file. ") if TIME not in exp_data.columns: raise NotImplementedError( f"Column \'time\' is missing in {data_type} " f"file. ") # non-mandatory columns if PREEQUILIBRATION_CONDITION_ID not in exp_data.columns: exp_data.insert(loc=1, column=PREEQUILIBRATION_CONDITION_ID, value='') if OBSERVABLE_PARAMETERS not in exp_data.columns: exp_data.insert(loc=4, column=OBSERVABLE_PARAMETERS, value='') if NOISE_PARAMETERS not in exp_data.columns: exp_data.insert(loc=4, column=NOISE_PARAMETERS, value=0) if REPLICATE_ID not in exp_data.columns: exp_data.insert(loc=4, column=REPLICATE_ID, value='') legend_dict = {} if DATASET_ID not in exp_data.columns: # TODO: ? if dataset_id_list is not None: exp_data.insert(loc=4, column=DATASET_ID, value=dataset_id_list) else: # datasetId_list will be created (possibly overwriting previous # list - only in the local variable, not in the tsv-file) # check consistency of settings group_by = check_vis_spec_consistency(exp_data, dataset_id_list, sim_cond_id_list, sim_cond_num_list, observable_id_list, observable_num_list) observable_id_list = \ [[el] for el in exp_data.observableId.unique()] exp_data, dataset_id_list, legend_dict, _ = create_dataset_id_list( sim_cond_id_list, sim_cond_num_list, observable_id_list, observable_num_list, exp_data, exp_conditions, group_by) # if DATASET_ID is in exp_data.columns, legend dict will be empty return exp_data, dataset_id_list, legend_dict
[docs]def handle_dataset_plot(plot_spec: pd.Series, ax: plt.Axes, exp_data: pd.DataFrame, exp_conditions: pd.DataFrame, sim_data: pd.DataFrame): """ Handle dataset plot """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) # get datasetID and independent variable of first entry of plot1 dataset_id = plot_spec[DATASET_ID] indep_var = plot_spec[X_VALUES] # define index to reduce exp_data to data linked to datasetId ind_dataset = exp_data[DATASET_ID] == dataset_id # gather simulationConditionIds belonging to datasetId uni_condition_id, uind = np.unique( exp_data[ind_dataset][SIMULATION_CONDITION_ID], return_index=True) # keep the ordering which was given by user from top to bottom # (avoid ordering by names '1','10','11','2',...)' uni_condition_id = uni_condition_id[np.argsort(uind)] col_name_unique = SIMULATION_CONDITION_ID # Case separation of independent parameter: condition, time or custom if indep_var == TIME: # obtain unique observation times uni_condition_id = np.unique(exp_data[ind_dataset][TIME]) col_name_unique = TIME conditions = uni_condition_id elif indep_var == 'condition': conditions = None else: # extract conditions (plot input) from condition file ind_cond = exp_conditions.index.isin(uni_condition_id) conditions = exp_conditions[ind_cond][indep_var] # retrieve measurements from dataframes measurement_to_plot = get_data_to_plot(plot_spec, exp_data, sim_data, uni_condition_id, col_name_unique) # check, whether simulation should be plotted plot_sim = sim_data is not None # plot data nan_set = all([np.isnan(val) for val in measurement_to_plot['mean']]) if not nan_set: plot_lowlevel(plot_spec, ax, conditions, measurement_to_plot, plot_sim) # Beautify plots ax.set_xlabel( plot_spec.xLabel) ax.set_ylabel( plot_spec.yLabel)
[docs]def matches_plot_spec(df: pd.DataFrame, col_id: str, x_value: Union[float, str], plot_spec: pd.Series) -> pd.Series: """ Constructs an index for subsetting of the dataframe according to what is specified in plot_spec. Parameters: df: pandas data frame to subset, can be from measurement file or simulation file col_id: name of the column that will be used for indexing in x variable x_value: subsetted x value plot_spec: visualization spec from the visualization file Returns: Boolean series that can be used for subsetting of the passed dataframe """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) subset = ( (df[col_id] == x_value) & (df[DATASET_ID] == plot_spec[DATASET_ID]) ) if plot_spec[Y_VALUES] == '': if len(df.loc[subset, OBSERVABLE_ID].unique()) > 1: ValueError( f'{Y_VALUES} must be specified in visualization table if ' f'multiple different observables are available.' ) else: subset &= (df[OBSERVABLE_ID] == plot_spec[Y_VALUES]) return subset
[docs]def get_data_to_plot(plot_spec: pd.Series, m_data: pd.DataFrame, simulation_data: pd.DataFrame, condition_ids: np.ndarray, col_id: str, simulation_field: str = SIMULATION) -> pd.DataFrame: """ Group the data, which should be plotted and return it as dataframe. Parameters: plot_spec: information about contains defined data format (visualization file) m_data: contains defined data format (measurement file) simulation_data: contains defined data format (simulation file) condition_ids: contains all unique condition IDs which should be plotted in one figure (can be found in measurementData file, column simulationConditionId) col_id: the name of the column in visualization file, whose entries should be unique (depends on condition in column xValues) simulation_field: Column name in ``simulation_data`` that contains the actual simulation result. Returns: Contains the data which should be plotted (Mean and Std) """ warnings.warn("This function will be removed in future releases. ", DeprecationWarning) # create empty dataframe for means and SDs data_to_plot = pd.DataFrame( columns=['mean', 'noise_model', 'sd', 'sem', 'repl', 'sim'], index=condition_ids ) for var_cond_id in condition_ids: # TODO (#117): Here not the case: So, if entries in measurement file: # preequCondId, time, observableParams, noiseParams, # are not the same, then -> differ these data into # different groups! # now: go in simulationConditionId, search group of unique # simulationConditionId e.g. rows 0,6,12,18 share the same # simulationCondId, then check if other column entries are the same # (now: they are), then take intersection of rows 0,6,12,18 and checked # other same columns (-> now: 0,6,12,18) and then go on with code. # if there is at some point a difference in other columns, say e.g. # row 12,18 have different noiseParams than rows 0,6, the actual code # would take rows 0,6 and forget about rows 12,18 # compute mean and standard deviation across replicates subset = matches_plot_spec(m_data, col_id, var_cond_id, plot_spec) data_measurements = m_data.loc[ subset, MEASUREMENT ] data_to_plot.at[var_cond_id, 'mean'] = np.mean(data_measurements) data_to_plot.at[var_cond_id, 'sd'] = np.std(data_measurements) if (plot_spec.plotTypeData == PROVIDED) & sum(subset): if len(m_data.loc[subset, NOISE_PARAMETERS].unique()) > 1: raise NotImplementedError( f"Datapoints with inconsistent {NOISE_PARAMETERS} is " f"currently not implemented. Stopping.") tmp_noise = m_data.loc[subset, NOISE_PARAMETERS].values[0] if isinstance(tmp_noise, str): raise NotImplementedError( "No numerical noise values provided in the measurement " "table. Stopping.") if isinstance(tmp_noise, Number) or tmp_noise.dtype == 'float64': data_to_plot.at[var_cond_id, 'noise_model'] = tmp_noise # standard error of mean data_to_plot.at[var_cond_id, 'sem'] = \ np.std(data_measurements) / np.sqrt(len(data_measurements)) # single replicates data_to_plot.at[var_cond_id, 'repl'] = \ data_measurements if simulation_data is not None: simulation_measurements = simulation_data.loc[ matches_plot_spec(simulation_data, col_id, var_cond_id, plot_spec), simulation_field ] data_to_plot.at[var_cond_id, 'sim'] = np.mean( simulation_measurements ) return data_to_plot