"""
This file should contain the functions, which PEtab internally needs for
plotting, but which are not meant to be used by non-developers and should
hence not be directly visible/usable when using `import petab.visualize`.
"""
import functools
import warnings
from numbers import Number
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import petab
import seaborn as sns
from .plotting_config import plot_lowlevel
from ..C import *
from typing import Dict, List, Optional, Tuple, Union
# sns.set() This messes up plotting settings even if one just imports this file
# for typehints
IdsList = List[str]
NumList = List[int]
__all__ = ['check_ex_exp_columns',
'check_ex_visu_columns',
'check_vis_spec_consistency',
'create_dataset_id_list',
'create_dataset_id_list_new',
'create_figure',
'create_or_update_vis_spec',
'expand_vis_spec_settings',
'generate_dataset_id_col',
'get_data_to_plot',
'get_default_vis_specs',
'get_vis_spec_dependent_columns_dict',
'handle_dataset_plot',
'import_from_files',
'matches_plot_spec']
[docs]def import_from_files(
data_file_path: str,
condition_file_path: str,
simulation_file_path: str,
dataset_id_list: List[IdsList],
sim_cond_id_list: List[IdsList],
sim_cond_num_list: List[NumList],
observable_id_list: List[IdsList],
observable_num_list: List[NumList],
plotted_noise: str,
visualization_file_path: str = None
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
"""
Helper function for plotting data and simulations, which imports data
from PEtab files. If `visualization_file_path` is not provided, the
visualization specification DataFrame will be generated automatically.
For documentation, see main function plot_data_and_simulation()
Returns:
A tuple of experimental data, experimental conditions,
visualization specification and simulation data DataFrames.
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
# import measurement data and experimental condition
exp_data = petab.get_measurement_df(data_file_path)
exp_conditions = petab.get_condition_df(condition_file_path)
# import visualization specification, if file was specified
if visualization_file_path:
vis_spec = petab.get_visualization_df(visualization_file_path)
else:
# create them based on simulation conditions
vis_spec, exp_data = get_default_vis_specs(exp_data,
exp_conditions,
dataset_id_list,
sim_cond_id_list,
sim_cond_num_list,
observable_id_list,
observable_num_list,
plotted_noise)
# import simulation file, if file was specified
if simulation_file_path != '':
sim_data = petab.get_simulation_df(simulation_file_path)
else:
sim_data = None
return exp_data, exp_conditions, vis_spec, sim_data
[docs]def check_vis_spec_consistency(
exp_data: pd.DataFrame,
dataset_id_list: Optional[List[IdsList]] = None,
sim_cond_id_list: Optional[List[IdsList]] = None,
sim_cond_num_list: Optional[List[NumList]] = None,
observable_id_list: Optional[List[IdsList]] = None,
observable_num_list: Optional[List[NumList]] = None) -> str:
"""
Helper function for plotting data and simulations, which checks the
visualization setting, if no visualization specification file is provided.
For documentation, see main function plot_data_and_simulation()
Returns:
Specifies the grouping of data to plot.
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
# We have no vis_spec file. Check how data should be grouped
group_by = ''
if dataset_id_list is not None:
group_by += 'dataset'
# check whether grouping by simulation condition should be done
if sim_cond_id_list is not None and sim_cond_num_list is not None:
raise NotImplementedError(
"Either specify a list of simulation condition IDs or a list of "
"simulation condition numbers, but not both. Stopping.")
if sim_cond_id_list is not None or sim_cond_num_list is not None:
group_by += 'simulation'
# check whether grouping by observable should be done
if observable_id_list is not None and observable_num_list is not None:
raise NotImplementedError(
"Either specify a list of observable IDs or a list "
"of observable numbers, but not both. Stopping.")
if observable_id_list is not None or observable_num_list is not None:
group_by += 'observable'
# consistency check. Warn or error, if grouping not clear
if group_by == 'datasetsimulation':
warnings.warn("Found grouping by datasetId and simulation condition. "
"Using datasetId, omitting simulation condition.")
group_by = 'dataset'
elif group_by == 'datasetobservable':
warnings.warn("Found grouping by datasetId and observable. "
"Using datasetId, omitting observable.")
group_by = 'dataset'
elif group_by == 'datasetsimulationobservable':
warnings.warn("Found grouping by datasetId, simulation condition, and "
"observable. Using datasetId, omitting simulation "
"condition and observable.")
group_by = 'dataset'
elif group_by == 'simulationobservable':
raise NotImplementedError(
"Plotting without visualization specification file and datasetId "
"can be performed via grouping by simulation conditions OR "
"observables, but not both. Stopping.")
elif group_by in ['simulation', 'observable', 'dataset']:
pass
# if group_by is still empty (if visuSpec file is available but datasetId
# is not available), default: observables
elif group_by == '':
group_by = 'observable'
warnings.warn('Default plotting: grouping by observable. If you want '
'to specify another grouping option, please add '
'\'datasetId\' columns.')
else:
raise NotImplementedError(
"No information provided, how to plot data. Stopping.")
if group_by != 'dataset':
# group plots not by dataset. Check, whether such a column would
# have been available (and give a warning, if so)
if 'datasetId' in exp_data.columns:
warnings.warn("DatasetIds would have been available, but other "
"grouping was requested. Consider using datasetId.")
else:
if 'datasetId' not in exp_data.columns:
raise NotImplementedError(
"Data should be grouped by datasetId, but no datasetId is "
"given in the measurement file. Stopping.")
return group_by
[docs]def create_dataset_id_list(
simcond_id_list: List[IdsList],
simcond_num_list: List[NumList],
observable_id_list: List[IdsList],
observable_num_list: List[NumList],
exp_data: pd.DataFrame,
exp_conditions: pd.DataFrame,
group_by: str) -> Tuple[pd.DataFrame, List[IdsList], Dict, Dict]:
"""
Create dataset id list and corresponding plot legends.
Additionally, update/create DATASET_ID column of exp_data
Parameters:
group_by: defines grouping of data to plot
Returns:
A tuple of experimental DataFrame, list of datasetIds and
dictionary of plot legends, corresponding to the datasetIds
For additional documentation, see main function plot_data_and_simulation()
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
# create a column of dummy datasetIDs and legend entries: preallocate
dataset_id_column = []
legend_dict = {}
yvalues_dict = {}
# loop over experimental data table, create datasetId for each entry
tmp_simcond = list(exp_data[SIMULATION_CONDITION_ID])
tmp_obs = list(exp_data[OBSERVABLE_ID])
for ind, cond_id in enumerate(tmp_simcond):
# create and add dummy datasetID
dataset_id = cond_id + '_' + tmp_obs[ind]
dataset_id_column.append(dataset_id)
# create nicer legend entries from condition names instead of IDs
if dataset_id not in legend_dict.keys():
tmp = exp_conditions.loc[exp_conditions.index == cond_id]
if CONDITION_NAME not in tmp.columns or tmp[
CONDITION_NAME].isna().any():
tmp.loc[:, CONDITION_NAME] = tmp.index.tolist()
legend_dict[dataset_id] = tmp[CONDITION_NAME][0] + ' - ' + \
tmp_obs[ind]
yvalues_dict[dataset_id] = tmp_obs[ind]
# add these column to the measurement table (possibly overwrite)
if DATASET_ID in exp_data.columns:
exp_data = exp_data.drop(DATASET_ID, axis=1)
exp_data.insert(loc=exp_data.columns.size, column=DATASET_ID,
value=dataset_id_column)
# make dummy dataset names unique and iterable
unique_dataset_list = functools.reduce(
lambda tmp, x: tmp.append(x) or tmp if x not in tmp else tmp,
list(exp_data[DATASET_ID]), [])
unique_simcond_list = functools.reduce(
lambda tmp, x: tmp.append(x) or tmp if x not in tmp else tmp,
list(exp_data[SIMULATION_CONDITION_ID]), [])
unique_obs_list = functools.reduce(
lambda tmp, x: tmp.append(x) or tmp if x not in tmp else tmp,
list(exp_data[OBSERVABLE_ID]), [])
# we will need a dictionary for mapping simulation conditions
# /observables to datasets
ds_dict = {}
dataset_id_list = []
if group_by == 'simulation':
if simcond_id_list is None:
simcond_id_list = [[unique_simcond_list[i_cond] for i_cond in
i_cond_list] for i_cond_list in
simcond_num_list]
for simcond in unique_simcond_list:
# ds_dict[simcond] = [ds for ds in unique_dataset_list if ds[
# 0:len(simcond)+3] == simcond + ' - ']
# ds_dict[simcond] = [ds for ds in unique_dataset_list if ds[
# 0:len(simcond) + 3] == simcond + '_']
ds_dict[simcond] = [ds for ds in unique_dataset_list if ds[
0:len(simcond)] == simcond]
grouped_list = simcond_id_list
elif group_by == 'observable':
if not observable_id_list and not observable_num_list:
observable_id_list = [unique_obs_list]
if observable_id_list is None:
observable_id_list = [[unique_obs_list[i_obs] for i_obs in
i_obs_list] for i_obs_list in
observable_num_list]
for observable in unique_obs_list:
# ds_dict[observable] = [ds for ds in unique_dataset_list if ds[
# -len(observable)-3:] == ' - ' + observable]
ds_dict[observable] = [ds for ds in unique_dataset_list if ds[
-len(observable) - 1:] == '_' + observable]
grouped_list = observable_id_list
else:
raise NotImplementedError(
"Very, very weird error. Should not have happened. Something "
"went wrong in how datasets should be grouped. Very weird...")
for sublist in grouped_list:
datasets_for_this_plot = [dset for sublist_entry in sublist
for dset in ds_dict[sublist_entry]]
dataset_id_list.append(datasets_for_this_plot)
return exp_data, dataset_id_list, legend_dict, yvalues_dict
[docs]def generate_dataset_id_col(exp_data: pd.DataFrame) -> List[str]:
"""
Generate DATASET_ID column from condition_ids and observable_ids.
Parameters
----------
exp_data:
A measurement (simulation) DataFrame in the PEtab format.
Returns
-------
A list with generated datasetIds for each entry in the measurement
(simulation) DataFrame
"""
# create a column of dummy datasetIDs and legend entries: preallocate
dataset_id_column = []
# loop over experimental data table, create datasetId for each entry
tmp_simcond = list(exp_data[SIMULATION_CONDITION_ID])
tmp_obs = list(exp_data[OBSERVABLE_ID])
for ind, cond_id in enumerate(tmp_simcond):
# create and add dummy datasetID
dataset_id = cond_id + '_' + tmp_obs[ind]
dataset_id_column.append(dataset_id)
return dataset_id_column
[docs]def create_dataset_id_list_new(df: pd.DataFrame,
group_by: str,
id_list: List[IdsList]
) -> List[IdsList]:
"""
Create dataset ID list from a list of simulation condition IDs or
observable IDs.
Parameters:
df: Measurements or simulations DataFrame.
group_by: Defines grouping of data to plot.
id_list:
Grouping list. Each sublist corresponds to a subplot in a figure,
and contains the IDs of observables or simulation conditions for
the subplot.
Returns:
A list of datasetIds
"""
if DATASET_ID not in df.columns:
raise ValueError(f'{DATASET_ID} column must be in exp_data DataFrame')
dataset_id_list = []
if group_by == 'simulation':
groupping_col = SIMULATION_CONDITION_ID
elif group_by == 'observable':
groupping_col = OBSERVABLE_ID
if id_list is None:
# this is the default case. If no grouping is specified,
# all observables are plotted. One observable per plot.
unique_obs_list = df[OBSERVABLE_ID].unique()
id_list = [[obs_id] for obs_id in unique_obs_list]
else:
raise ValueError
for sublist in id_list:
plot_id_list = []
for cond_id in sublist:
plot_id_list.extend(list(
df[df[groupping_col] == cond_id][
DATASET_ID].unique()))
dataset_id_list.append(plot_id_list)
return dataset_id_list
[docs]def get_default_vis_specs(
exp_data: pd.DataFrame,
exp_conditions: pd.DataFrame,
dataset_id_list: Optional[List[IdsList]] = None,
sim_cond_id_list: Optional[List[IdsList]] = None,
sim_cond_num_list: Optional[List[NumList]] = None,
observable_id_list: Optional[List[IdsList]] = None,
observable_num_list: Optional[List[NumList]] = None,
plotted_noise: Optional[str] = MEAN_AND_SD
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Helper function for plotting data and simulations, which creates a
default visualization table and updates/creates DATASET_ID column of
exp_data
Returns:
A tuple of visualization specification DataFrame and experimental
DataFrame.
For documentation, see main function plot_data_and_simulation()
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
# check consistency of settings
group_by = check_vis_spec_consistency(
exp_data, dataset_id_list, sim_cond_id_list, sim_cond_num_list,
observable_id_list, observable_num_list)
if group_by != 'dataset':
# datasetId_list will be created (possibly overwriting previous list
# - only in the local variable, not in the tsv-file)
exp_data, dataset_id_list, legend_dict, _ = \
create_dataset_id_list(sim_cond_id_list, sim_cond_num_list,
observable_id_list, observable_num_list,
exp_data, exp_conditions, group_by)
dataset_id_column = [i_dataset for sublist in dataset_id_list
for i_dataset in sublist]
if group_by != 'dataset':
dataset_label_column = [legend_dict[i_dataset] for sublist in
dataset_id_list for i_dataset in sublist]
else:
dataset_label_column = dataset_id_column
# get number of plots and create plotId-lists
plot_id_list = [f'plot{ind+1}' for ind, inner_list in enumerate(
dataset_id_list) for _ in inner_list]
# create dataframe
vis_spec = pd.DataFrame({PLOT_ID: plot_id_list,
DATASET_ID: dataset_id_column,
LEGEND_ENTRY: dataset_label_column})
# fill columns with default values
fill_vis_spec = ((2, Y_LABEL, 'value'),
(2, Y_OFFSET, 0),
(2, Y_VALUES, ''),
(2, X_LABEL, 'time'),
(2, X_OFFSET, 0),
(2, X_VALUES, 'time'),
(1, Y_SCALE, LIN),
(1, X_SCALE, LIN),
(0, PLOT_TYPE_DATA, plotted_noise),
(0, PLOT_TYPE_SIMULATION, LINE_PLOT),
(0, PLOT_NAME, ''))
for pos, col, val in fill_vis_spec:
vis_spec.insert(loc=pos, column=col, value=val)
return vis_spec, exp_data
[docs]def get_vis_spec_dependent_columns_dict(
exp_data: pd.DataFrame,
exp_conditions: pd.DataFrame,
dataset_id_list: Optional[List[IdsList]] = None,
sim_cond_id_list: Optional[List[IdsList]] = None,
sim_cond_num_list: Optional[List[NumList]] = None,
observable_id_list: Optional[List[IdsList]] = None,
observable_num_list: Optional[List[NumList]] = None
) -> Tuple[pd.DataFrame, Dict]:
"""
Helper function for creating values for columns PLOT_ID, DATASET_ID,
LEGEND_ENTRY, Y_VALUES for visualization specification file.
DATASET_ID column of exp_data is updated accordingly.
Returns:
A tuple of experimental DataFrame and a dictionary with values for
columns PLOT_ID, DATASET_ID, LEGEND_ENTRY, Y_VALUES for visualization
specification file.
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
# check consistency of settings
group_by = check_vis_spec_consistency(
exp_data, dataset_id_list, sim_cond_id_list, sim_cond_num_list,
observable_id_list, observable_num_list)
if group_by != 'dataset':
# datasetId_list will be created (possibly overwriting previous list
# - only in the local variable, not in the tsv-file)
exp_data, dataset_id_list, legend_dict, yvalues_dict = \
create_dataset_id_list(sim_cond_id_list, sim_cond_num_list,
observable_id_list, observable_num_list,
exp_data, exp_conditions, group_by)
dataset_id_column = [i_dataset for sublist in dataset_id_list
for i_dataset in sublist]
if group_by != 'dataset':
dataset_label_column = [legend_dict[i_dataset] for sublist in
dataset_id_list for i_dataset in sublist]
yvalues_column = [yvalues_dict[i_dataset] for sublist in
dataset_id_list for i_dataset in sublist]
else:
dataset_label_column = dataset_id_column
yvalues_column = ['']*len(dataset_id_column)
# get number of plots and create plotId-lists
if group_by == 'observable':
obs_uni = list(np.unique(exp_data[OBSERVABLE_ID]))
# copy of dataset ids, for later replacing with plot ids
plot_id_column = dataset_id_column.copy()
for i_obs in range(0, len(obs_uni)):
# get dataset_ids which include observable name
matching = [s for s in dataset_id_column if obs_uni[i_obs] in s]
# replace the dataset ids with plot id with grouping of observables
for m_i in matching:
plot_id_column = [sub.replace(m_i, 'plot%s' % str(i_obs + 1))
for sub in plot_id_column]
else:
# get number of plots and create plotId-lists
plot_id_column = ['plot%s' % str(ind + 1) for ind, inner_list in
enumerate(dataset_id_list) for _ in inner_list]
columns_dict = {PLOT_ID: plot_id_column,
DATASET_ID: dataset_id_column,
LEGEND_ENTRY: dataset_label_column,
Y_VALUES: yvalues_column}
return exp_data, columns_dict
[docs]def expand_vis_spec_settings(vis_spec: pd.DataFrame, columns_dict):
"""
Expand visualization specification.
only makes sense if DATASET_ID is not in vis_spec.columns?
Parameters
----------
vis_spec: A visualization specification DataFrame.
columns_dict:
Returns
-------
Expanded visualization specification DataFrame
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
columns_to_expand = [PLOT_NAME, PLOT_TYPE_SIMULATION, PLOT_TYPE_DATA,
X_VALUES, X_OFFSET, X_LABEL, X_SCALE, Y_OFFSET,
Y_LABEL, Y_SCALE, LEGEND_ENTRY]
for column in vis_spec.columns:
if column in columns_to_expand:
column_entries = []
if Y_VALUES in vis_spec.columns:
for i, plot_id in enumerate(columns_dict[PLOT_ID]):
select_conditions = (vis_spec[PLOT_ID] == plot_id) & (
vis_spec[Y_VALUES] == columns_dict[Y_VALUES][i])
column_entries.append(
vis_spec[select_conditions].loc[:, column].values[0])
else:
# get unique plotIDs from visspecfile
vis_plotid_u = vis_spec[PLOT_ID].unique()
auto_plotid_u = list(set(columns_dict[PLOT_ID]))
# if number of plotIds does not coincide (autmatically
# generated plotIds according to observable grouping, vs
# plotIds specified in the visu_Spec)
if len(vis_plotid_u) is not len(auto_plotid_u):
# which items are not in visu_plotId:
del_plotid = \
list(set(columns_dict[PLOT_ID]) - set(vis_plotid_u))
# replace automatically generated plotIds with 'plot1' from
# visu file
for d_i in del_plotid:
columns_dict[PLOT_ID] = [
sub.replace(d_i, vis_plotid_u[0])
for sub in columns_dict[PLOT_ID]]
for plot_id in columns_dict[PLOT_ID]:
select_conditions = vis_spec[PLOT_ID] == plot_id
column_entries.append(
vis_spec[select_conditions].loc[:, column].values[0])
columns_dict[column] = column_entries
vis_spec = pd.DataFrame(columns_dict)
return vis_spec
[docs]def create_or_update_vis_spec(
exp_data: pd.DataFrame,
exp_conditions: pd.DataFrame,
vis_spec: Optional[pd.DataFrame] = None,
dataset_id_list: Optional[List[IdsList]] = None,
sim_cond_id_list: Optional[List[IdsList]] = None,
sim_cond_num_list: Optional[List[NumList]] = None,
observable_id_list: Optional[List[IdsList]] = None,
observable_num_list: Optional[List[NumList]] = None,
plotted_noise: Optional[str] = MEAN_AND_SD):
"""
Helper function for plotting data and simulations, which updates vis_spec
file if necessary or creates a default visualization table and
updates/creates DATASET_ID column of exp_data. As a result, a visualization
specification file exists with columns PLOT_ID, DATASET_ID, Y_VALUES and
LEGEND_ENTRY
Returns:
A tuple of visualization specification DataFrame and experimental
DataFrame.
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
if vis_spec is None:
# create dataframe
exp_data, columns_dict = \
get_vis_spec_dependent_columns_dict(exp_data,
exp_conditions,
dataset_id_list,
sim_cond_id_list,
sim_cond_num_list,
observable_id_list,
observable_num_list)
vis_spec = pd.DataFrame(columns_dict)
else:
# TODO: do validation issue #190
# so, plotid is definitely there
if DATASET_ID not in vis_spec.columns:
if Y_VALUES in vis_spec.columns:
plot_id_list = np.unique(vis_spec[PLOT_ID])
observable_id_list = [vis_spec[vis_spec[PLOT_ID] ==
plot_id].loc[:, Y_VALUES].values
for plot_id in plot_id_list]
exp_data, columns_dict = \
get_vis_spec_dependent_columns_dict(
exp_data,
exp_conditions,
observable_id_list=observable_id_list)
else:
# PLOT_ID is there, but NOT DATASET_ID and not Y_VALUES,
# but potentially some settings.
# TODO: multiple plotids with diff settings
exp_data, columns_dict = \
get_vis_spec_dependent_columns_dict(
exp_data,
exp_conditions)
# get other settings that could have potentially been there
# and expand according to plot_id_column
vis_spec = expand_vis_spec_settings(vis_spec, columns_dict)
# if dataset_id is there, then nothing to expand?
if PLOT_TYPE_DATA not in vis_spec.columns:
vis_spec[PLOT_TYPE_DATA] = plotted_noise
# check columns, and add non-mandatory default columns
vis_spec = check_ex_visu_columns(vis_spec, exp_data, exp_conditions)
return exp_data, vis_spec
[docs]def check_ex_visu_columns(vis_spec: pd.DataFrame,
exp_data: pd.DataFrame,
exp_conditions: pd.DataFrame) -> pd.DataFrame:
"""
Check the columns in Visu_Spec file, if non-mandotory columns does not
exist, create default columns
Returns:
Updated visualization specification DataFrame
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
if PLOT_NAME not in vis_spec.columns:
vis_spec[PLOT_NAME] = ''
if PLOT_TYPE_SIMULATION not in vis_spec.columns:
vis_spec[PLOT_TYPE_SIMULATION] = LINE_PLOT
if PLOT_TYPE_DATA not in vis_spec.columns:
vis_spec[PLOT_TYPE_DATA] = MEAN_AND_SD
if X_VALUES not in vis_spec.columns:
# check if time is constant in expdata (if yes, plot dose response)
# otherwise plot time series
uni_time = pd.unique(exp_data[TIME])
if len(uni_time) > 1:
vis_spec[X_VALUES] = 'time'
elif len(uni_time) == 1:
if np.isin(exp_conditions.columns.values, 'conditionName').any():
conds = exp_conditions.columns.drop('conditionName')
else:
conds = exp_conditions.columns
# default: first dose-response condition (first from condition
# table) is plotted
# TODO: expand to automatic plotting of all conditions
vis_spec[X_VALUES] = conds[0]
vis_spec[X_LABEL] = conds[0]
warnings.warn(
'\n First dose-response condition is plotted. \n Check which '
'condition you want to plot \n and possibly enter it into the '
'column *xValues* \n in the visualization table.')
else:
raise NotImplementedError(
'Strange Error. There is no time defined in the measurement '
'table?')
if X_OFFSET not in vis_spec.columns:
vis_spec[X_OFFSET] = 0
if X_LABEL not in vis_spec.columns:
vis_spec[X_LABEL] = 'time'
vis_spec.loc[vis_spec[X_VALUES] != 'time', X_LABEL] = 'condition'
if X_SCALE not in vis_spec.columns:
vis_spec[X_SCALE] = LIN
if Y_VALUES not in vis_spec.columns:
vis_spec[Y_VALUES] = ''
if Y_OFFSET not in vis_spec.columns:
vis_spec[Y_OFFSET] = 0
if Y_LABEL not in vis_spec.columns:
vis_spec[Y_LABEL] = 'value'
if Y_SCALE not in vis_spec.columns:
vis_spec[Y_SCALE] = LIN
if LEGEND_ENTRY not in vis_spec.columns:
vis_spec[LEGEND_ENTRY] = vis_spec[DATASET_ID]
return vis_spec
[docs]def check_ex_exp_columns(
exp_data: pd.DataFrame,
dataset_id_list: List[IdsList],
sim_cond_id_list: List[IdsList],
sim_cond_num_list: List[NumList],
observable_id_list: List[IdsList],
observable_num_list: List[NumList],
exp_conditions: pd.DataFrame,
sim: Optional[bool] = False
) -> Tuple[pd.DataFrame, List[IdsList], Dict]:
"""
Check the columns in measurement file, if non-mandotory columns does not
exist, create default columns
Returns:
A tuple of experimental DataFrame, list of datasetIds and
dictionary of plot legends, corresponding to the datasetIds
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
data_type = MEASUREMENT
if sim:
data_type = SIMULATION
# mandatory columns
if OBSERVABLE_ID not in exp_data.columns:
raise NotImplementedError(
f"Column \'observableId\' is missing in {data_type} file. ")
if SIMULATION_CONDITION_ID not in exp_data.columns:
raise NotImplementedError(
f"Column \'simulationConditionId\' is missing in {data_type} "
f"file. ")
if data_type not in exp_data.columns:
raise NotImplementedError(
f"Column \'{data_type}\' is missing in {data_type} "
f"file. ")
if TIME not in exp_data.columns:
raise NotImplementedError(
f"Column \'time\' is missing in {data_type} "
f"file. ")
# non-mandatory columns
if PREEQUILIBRATION_CONDITION_ID not in exp_data.columns:
exp_data.insert(loc=1, column=PREEQUILIBRATION_CONDITION_ID,
value='')
if OBSERVABLE_PARAMETERS not in exp_data.columns:
exp_data.insert(loc=4, column=OBSERVABLE_PARAMETERS,
value='')
if NOISE_PARAMETERS not in exp_data.columns:
exp_data.insert(loc=4, column=NOISE_PARAMETERS,
value=0)
if REPLICATE_ID not in exp_data.columns:
exp_data.insert(loc=4, column=REPLICATE_ID,
value='')
legend_dict = {}
if DATASET_ID not in exp_data.columns:
# TODO: ?
if dataset_id_list is not None:
exp_data.insert(loc=4, column=DATASET_ID,
value=dataset_id_list)
else:
# datasetId_list will be created (possibly overwriting previous
# list - only in the local variable, not in the tsv-file)
# check consistency of settings
group_by = check_vis_spec_consistency(exp_data,
dataset_id_list,
sim_cond_id_list,
sim_cond_num_list,
observable_id_list,
observable_num_list)
observable_id_list = \
[[el] for el in exp_data.observableId.unique()]
exp_data, dataset_id_list, legend_dict, _ = create_dataset_id_list(
sim_cond_id_list, sim_cond_num_list, observable_id_list,
observable_num_list, exp_data, exp_conditions, group_by)
# if DATASET_ID is in exp_data.columns, legend dict will be empty
return exp_data, dataset_id_list, legend_dict
[docs]def handle_dataset_plot(plot_spec: pd.Series,
ax: plt.Axes,
exp_data: pd.DataFrame,
exp_conditions: pd.DataFrame,
sim_data: pd.DataFrame):
"""
Handle dataset plot
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
# get datasetID and independent variable of first entry of plot1
dataset_id = plot_spec[DATASET_ID]
indep_var = plot_spec[X_VALUES]
# define index to reduce exp_data to data linked to datasetId
ind_dataset = exp_data[DATASET_ID] == dataset_id
# gather simulationConditionIds belonging to datasetId
uni_condition_id, uind = np.unique(
exp_data[ind_dataset][SIMULATION_CONDITION_ID],
return_index=True)
# keep the ordering which was given by user from top to bottom
# (avoid ordering by names '1','10','11','2',...)'
uni_condition_id = uni_condition_id[np.argsort(uind)]
col_name_unique = SIMULATION_CONDITION_ID
# Case separation of independent parameter: condition, time or custom
if indep_var == TIME:
# obtain unique observation times
uni_condition_id = np.unique(exp_data[ind_dataset][TIME])
col_name_unique = TIME
conditions = uni_condition_id
elif indep_var == 'condition':
conditions = None
else:
# extract conditions (plot input) from condition file
ind_cond = exp_conditions.index.isin(uni_condition_id)
conditions = exp_conditions[ind_cond][indep_var]
# retrieve measurements from dataframes
measurement_to_plot = get_data_to_plot(plot_spec, exp_data, sim_data,
uni_condition_id, col_name_unique)
# check, whether simulation should be plotted
plot_sim = sim_data is not None
# plot data
nan_set = all([np.isnan(val) for val in measurement_to_plot['mean']])
if not nan_set:
plot_lowlevel(plot_spec, ax, conditions, measurement_to_plot, plot_sim)
# Beautify plots
ax.set_xlabel(
plot_spec.xLabel)
ax.set_ylabel(
plot_spec.yLabel)
[docs]def matches_plot_spec(df: pd.DataFrame,
col_id: str,
x_value: Union[float, str],
plot_spec: pd.Series) -> pd.Series:
"""
Constructs an index for subsetting of the dataframe according to what is
specified in plot_spec.
Parameters:
df:
pandas data frame to subset, can be from measurement file or
simulation file
col_id:
name of the column that will be used for indexing in x variable
x_value:
subsetted x value
plot_spec:
visualization spec from the visualization file
Returns:
Boolean series that can be used for subsetting of the passed
dataframe
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
subset = (
(df[col_id] == x_value) &
(df[DATASET_ID] == plot_spec[DATASET_ID])
)
if plot_spec[Y_VALUES] == '':
if len(df.loc[subset, OBSERVABLE_ID].unique()) > 1:
ValueError(
f'{Y_VALUES} must be specified in visualization table if '
f'multiple different observables are available.'
)
else:
subset &= (df[OBSERVABLE_ID] == plot_spec[Y_VALUES])
return subset
[docs]def get_data_to_plot(plot_spec: pd.Series,
m_data: pd.DataFrame,
simulation_data: pd.DataFrame,
condition_ids: np.ndarray,
col_id: str,
simulation_field: str = SIMULATION) -> pd.DataFrame:
"""
Group the data, which should be plotted and return it as dataframe.
Parameters:
plot_spec:
information about contains defined data format (visualization file)
m_data:
contains defined data format (measurement file)
simulation_data:
contains defined data format (simulation file)
condition_ids:
contains all unique condition IDs which should be
plotted in one figure (can be found in measurementData file,
column simulationConditionId)
col_id:
the name of the column in visualization file, whose entries
should be unique (depends on condition in column
xValues)
simulation_field:
Column name in ``simulation_data`` that contains the actual
simulation result.
Returns:
Contains the data which should be plotted (Mean and Std)
"""
warnings.warn("This function will be removed in future releases. ",
DeprecationWarning)
# create empty dataframe for means and SDs
data_to_plot = pd.DataFrame(
columns=['mean', 'noise_model', 'sd', 'sem', 'repl', 'sim'],
index=condition_ids
)
for var_cond_id in condition_ids:
# TODO (#117): Here not the case: So, if entries in measurement file:
# preequCondId, time, observableParams, noiseParams,
# are not the same, then -> differ these data into
# different groups!
# now: go in simulationConditionId, search group of unique
# simulationConditionId e.g. rows 0,6,12,18 share the same
# simulationCondId, then check if other column entries are the same
# (now: they are), then take intersection of rows 0,6,12,18 and checked
# other same columns (-> now: 0,6,12,18) and then go on with code.
# if there is at some point a difference in other columns, say e.g.
# row 12,18 have different noiseParams than rows 0,6, the actual code
# would take rows 0,6 and forget about rows 12,18
# compute mean and standard deviation across replicates
subset = matches_plot_spec(m_data, col_id, var_cond_id, plot_spec)
data_measurements = m_data.loc[
subset,
MEASUREMENT
]
data_to_plot.at[var_cond_id, 'mean'] = np.mean(data_measurements)
data_to_plot.at[var_cond_id, 'sd'] = np.std(data_measurements)
if (plot_spec.plotTypeData == PROVIDED) & sum(subset):
if len(m_data.loc[subset, NOISE_PARAMETERS].unique()) > 1:
raise NotImplementedError(
f"Datapoints with inconsistent {NOISE_PARAMETERS} is "
f"currently not implemented. Stopping.")
tmp_noise = m_data.loc[subset, NOISE_PARAMETERS].values[0]
if isinstance(tmp_noise, str):
raise NotImplementedError(
"No numerical noise values provided in the measurement "
"table. Stopping.")
if isinstance(tmp_noise, Number) or tmp_noise.dtype == 'float64':
data_to_plot.at[var_cond_id, 'noise_model'] = tmp_noise
# standard error of mean
data_to_plot.at[var_cond_id, 'sem'] = \
np.std(data_measurements) / np.sqrt(len(data_measurements))
# single replicates
data_to_plot.at[var_cond_id, 'repl'] = \
data_measurements
if simulation_data is not None:
simulation_measurements = simulation_data.loc[
matches_plot_spec(simulation_data, col_id, var_cond_id,
plot_spec),
simulation_field
]
data_to_plot.at[var_cond_id, 'sim'] = np.mean(
simulation_measurements
)
return data_to_plot