Source code for petab.visualize.plot_data_and_simulation

"""Functions for plotting PEtab measurement files and simulation results in
the same format."""

from typing import Dict, Union, Optional, List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings

from .helper_functions import (create_figure,
                               handle_dataset_plot,
                               check_ex_exp_columns,
                               create_or_update_vis_spec)

from .plotter import MPLPlotter
from .plotting import VisSpecParser
from .. import problem, measurements, core, conditions
from ..C import *

# for typehints
IdsList = List[str]
NumList = List[int]


__all__ = ["plot_data_and_simulation", "plot_petab_problem",
           "plot_measurements_by_observable", "save_vis_spec",
           "plot_with_vis_spec", "plot_without_vis_spec", "plot_problem"]
# autodoc mixes up plot_data_and_simulation model and function and documents
# the latter, but this function will be removed in future releases


[docs]def plot_data_and_simulation(
        exp_data: Union[str, pd.DataFrame],
        exp_conditions: Union[str, pd.DataFrame],
        vis_spec: Optional[Union[str, pd.DataFrame]] = None,
        sim_data: Optional[Union[str, pd.DataFrame]] = None,
        dataset_id_list: Optional[List[IdsList]] = None,
        sim_cond_id_list: Optional[List[IdsList]] = None,
        sim_cond_num_list: Optional[List[NumList]] = None,
        observable_id_list: Optional[List[IdsList]] = None,
        observable_num_list: Optional[List[NumList]] = None,
        plotted_noise: Optional[str] = MEAN_AND_SD,
        subplot_file_path: str = ''
) -> Optional[Union[Dict[str, plt.Subplot],
                    'np.ndarray[plt.Subplot]']]:
    """
    Main function for plotting data and simulations.

    What exactly should be plotted is specified in a
    visualizationSpecification.tsv file.

    Also, the data, simulations and conditions have
    to be defined in a specific format
    (see "doc/documentation_data_format.md").

    Parameters
    ----------
    exp_data:
        measurement DataFrame in the PEtab format or path to the data file.
    exp_conditions:
        condition DataFrame in the PEtab format or path to the condition file.
    vis_spec:
        Visualization specification DataFrame in the PEtab format or path to
        visualization file.
    sim_data:
        simulation DataFrame in the PEtab format
        or path to the simulation output data file.
    dataset_id_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the datasetIds for this plot.
        Only to be used if no visualization file was available.
    sim_cond_id_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the simulationConditionIds for this plot.
        Only to be used if no visualization file was available.
    sim_cond_num_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the numbers corresponding to the simulationConditionIds for
        this plot.
        Only to be used if no visualization file was available.
    observable_id_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the observableIds for this plot.
        Only to be used if no visualization file was available.
    observable_num_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the numbers corresponding to the observableIds for
        this plot.
        Only to be used if no visualization file was available.
    plotted_noise:
        String indicating how noise should be visualized:
        ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']
    subplot_file_path:
        String which is taken as file path to which single subplots are saved.
        PlotIDs will be taken as file names.

    Returns
    -------
    ax: Axis object of the created plot.
    None: In case subplots are save to file
    """
    warnings.warn("This function will be removed in future releases. "
                  "Please use plot_with_vis_spec or plot_without_vis_spec "
                  "instead.",
                  DeprecationWarning)

    if isinstance(exp_conditions, str):
        exp_conditions = conditions.get_condition_df(exp_conditions)

    # import simulation file, if file was specified
    if sim_data is not None:
        if isinstance(sim_data, str):
            sim_data = core.get_simulation_df(sim_data)
        # check columns, and add non-mandatory default columns
        sim_data, _, _ = check_ex_exp_columns(sim_data,
                                              dataset_id_list,
                                              sim_cond_id_list,
                                              sim_cond_num_list,
                                              observable_id_list,
                                              observable_num_list,
                                              exp_conditions,
                                              sim=True)

    # import from file in case experimental data is provided in file
    if isinstance(exp_data, str):
        exp_data = measurements.get_measurement_df(exp_data)
    # check columns, and add non-mandatory default columns

    # TODO: legend_dict is not used?
    exp_data, dataset_id_list, legend_dict = \
        check_ex_exp_columns(exp_data,
                             dataset_id_list,
                             sim_cond_id_list,
                             sim_cond_num_list,
                             observable_id_list,
                             observable_num_list,
                             exp_conditions)

    # import visualization specification, if file was specified
    if isinstance(vis_spec, str):
        vis_spec = core.get_visualization_df(vis_spec)

    exp_data, vis_spec = create_or_update_vis_spec(exp_data,
                                                   exp_conditions,
                                                   vis_spec,
                                                   dataset_id_list,
                                                   sim_cond_id_list,
                                                   sim_cond_num_list,
                                                   observable_id_list,
                                                   observable_num_list,
                                                   plotted_noise)

    # TODO: causing some problems?
    if sim_data is not None:
        sim_data[DATASET_ID] = exp_data[DATASET_ID]

    # get unique plotIDs
    uni_plot_ids = np.unique(vis_spec[PLOT_ID])

    # Switch saving plots to file on or get axes
    plots_to_file = subplot_file_path != ''
    if not plots_to_file:
        fig, axes = create_figure(uni_plot_ids, plots_to_file)

    # loop over unique plotIds
    for var_plot_id in uni_plot_ids:

        if plots_to_file:
            fig, axes = create_figure(uni_plot_ids, plots_to_file)
            ax = axes[0, 0]
        else:
            ax = axes[var_plot_id]

        # get indices for specific plotId
        ind_plot = (vis_spec[PLOT_ID] == var_plot_id)

        # loop over datsets
        for _, plot_spec in vis_spec[ind_plot].iterrows():
            # handle plot of current dataset
            handle_dataset_plot(plot_spec, ax, exp_data,
                                exp_conditions, sim_data)

        if all(vis_spec.loc[ind_plot, PLOT_TYPE_SIMULATION].isin([BAR_PLOT])):

            legend = ['measurement']

            if sim_data is not None:
                legend.append('simulation')

            ax.legend(legend)
            x_names = vis_spec.loc[ind_plot, LEGEND_ENTRY]
            ax.set_xticks(range(len(x_names)))
            ax.set_xticklabels(x_names)

            for label in ax.get_xmajorticklabels():
                label.set_rotation(30)
                label.set_horizontalalignment("right")

        if plots_to_file:
            plt.tight_layout()
            plt.savefig(f'{subplot_file_path}/{var_plot_id}.png')
            plt.close()

    # finalize figure
    if not plots_to_file:
        fig.tight_layout()
        return axes

    return None


[docs]def plot_petab_problem(
        petab_problem: problem.Problem,
        sim_data: Optional[Union[str, pd.DataFrame]] = None,
        dataset_id_list: Optional[List[IdsList]] = None,
        sim_cond_id_list: Optional[List[IdsList]] = None,
        sim_cond_num_list: Optional[List[NumList]] = None,
        observable_id_list: Optional[List[IdsList]] = None,
        observable_num_list: Optional[List[NumList]] = None,
        plotted_noise: Optional[str] = MEAN_AND_SD
) -> Optional[Union[Dict[str, plt.Subplot], 'np.ndarray[plt.Subplot]']]:
    """
    Visualization using petab problem.
    For documentation, see function plot_data_and_simulation()
    """
    warnings.warn("This function will be removed in future releases. "
                  "Pleas use plot_problem instead",
                  DeprecationWarning)

    return plot_data_and_simulation(petab_problem.measurement_df,
                                    petab_problem.condition_df,
                                    petab_problem.visualization_df,
                                    sim_data,
                                    dataset_id_list,
                                    sim_cond_id_list,
                                    sim_cond_num_list,
                                    observable_id_list,
                                    observable_num_list,
                                    plotted_noise)


[docs]def plot_measurements_by_observable(
        data_file_path: str,
        condition_file_path: str,
        plotted_noise: Optional[str] = MEAN_AND_SD
) -> Optional[Union[Dict[str, plt.Subplot], 'np.ndarray[plt.Subplot]']]:
    """
    plot measurement data grouped by observable ID.
    A simple wrapper around the more complex function plot_data_and_simulation.

    Parameters
    ----------

    data_file_path:
        file path of measurement data
    condition_file_path:
        file path of condition file
    plotted_noise:
        String indicating how noise should be visualized:
        ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']

    Returns
    -------

    ax: axis of figures
    """
    warnings.warn("This function will be removed in future releases. "
                  "Please use plot_without_vis_spec without providing "
                  "grouping_list",
                  DeprecationWarning)

    # import measurement data
    measurement_data = measurements.get_measurement_df(data_file_path)

    # get unique observable ID
    observable_id = np.array(measurement_data.observableId)
    uni_observable_id = np.unique(observable_id)
    observable_id_list = [[str(obsId)] for obsId in uni_observable_id]

    # use new routine now
    ax = plot_data_and_simulation(measurement_data, condition_file_path,
                                  observable_id_list=observable_id_list,
                                  plotted_noise=plotted_noise)

    return ax


[docs]def save_vis_spec(
        exp_data: Union[str, pd.DataFrame],
        exp_conditions: Union[str, pd.DataFrame],
        vis_spec: Optional[Union[str, pd.DataFrame]] = None,
        dataset_id_list: Optional[List[IdsList]] = None,
        sim_cond_id_list: Optional[List[IdsList]] = None,
        sim_cond_num_list: Optional[List[NumList]] = None,
        observable_id_list: Optional[List[IdsList]] = None,
        observable_num_list: Optional[List[NumList]] = None,
        plotted_noise: Optional[str] = MEAN_AND_SD,
        output_file_path: str = 'visuSpec.tsv'
):
    """
    Generate and save visualization specification to a file.
    If vis_spec is provided, the missing columns will be added.

    Parameters
    ----------
    exp_data:
        Measurement DataFrame in the PEtab format or path to the data file.
    exp_conditions:
        Condition DataFrame in the PEtab format or path to the condition file.
    vis_spec:
        Visualization specification DataFrame in the PEtab format or path to
        visualization file.
    dataset_id_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the datasetIds for this plot.
        Only to be used if no visualization file was available.
    sim_cond_id_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the simulationConditionIds for this plot.
        Only to be used if no visualization file was available.
    sim_cond_num_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the numbers corresponding to the simulationConditionIds for
        this plot.
        Only to be used if no visualization file was available.
    observable_id_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the observableIds for this plot.
        Only to be used if no visualization file was available.
    observable_num_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the numbers corresponding to the observableIds for
        this plot.
        Only to be used if no visualization file was available.
    plotted_noise:
        String indicating how noise should be visualized:
        ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']
    output_file_path:
        File path to which  the generated visualization specification is saved.
    """
    warnings.warn("This function will be removed in future releases. ",
                  DeprecationWarning)

    # import from file in case experimental data is provided in file
    if isinstance(exp_data, str):
        exp_data = measurements.get_measurement_df(exp_data)

    if isinstance(exp_conditions, str):
        exp_conditions = conditions.get_condition_df(exp_conditions)

    # import visualization specification, if file was specified
    if isinstance(vis_spec, str):
        vis_spec = core.get_visualization_df(vis_spec)

    _, vis_spec = create_or_update_vis_spec(exp_data,
                                            exp_conditions,
                                            vis_spec,
                                            dataset_id_list,
                                            sim_cond_id_list,
                                            sim_cond_num_list,
                                            observable_id_list,
                                            observable_num_list,
                                            plotted_noise)

    vis_spec.to_csv(output_file_path, sep='\t', index=False)


[docs]def plot_with_vis_spec(
        vis_spec_df,
        conditions_df: Union[str, pd.DataFrame],
        measurements_df: Optional[Union[str, pd.DataFrame]] = None,
        simulations_df: Optional[Union[str, pd.DataFrame]] = None,
        subplot_dir: Optional[str] = None,
        plotter_type: str = 'mpl') -> Optional[Dict[str, plt.Subplot]]:
    """
    Plot measurements and/or simulations. Specification of the visualization
    routines is provided in visualization table.

    Parameters
    ----------
    vis_spec_df: visualization table
    conditions_df:
        A condition DataFrame in the PEtab format or path to the condition
        file.
    measurements_df:
        A measurement DataFrame in the PEtab format or path to the data file.
    simulations_df:
        A simulation DataFrame in the PEtab format or path to the simulation
        output data file.
    subplot_dir:
        A path to the folder where single subplots should be saved.
        PlotIDs will be taken as file names.
    plotter_type:
        Specifies which library should be used for plot generation. Currently,
        only matplotlib is supported.

    Returns
    -------
    ax: Axis object of the created plot.
    None: In case subplots are saved to a file.
    """

    if measurements_df is None and simulations_df is None:
        raise TypeError('Not enough arguments. Either measurements_data '
                        'or simulations_data should be provided.')

    vis_spec_parser = VisSpecParser(conditions_df, measurements_df,
                                    simulations_df)
    figure, dataprovider = vis_spec_parser.parse_from_vis_spec(
        vis_spec_df)

    if plotter_type == 'mpl':
        plotter = MPLPlotter(figure, dataprovider)
    else:
        raise NotImplementedError('Currently, only visualization with '
                                  'matplotlib is possible.')
    ax = plotter.generate_figure(subplot_dir)
    return ax


[docs]def plot_without_vis_spec(
        conditions_df: Union[str, pd.DataFrame],
        grouping_list: Optional[List[IdsList]] = None,
        group_by: str = 'observable',
        measurements_df: Optional[Union[str, pd.DataFrame]] = None,
        simulations_df: Optional[Union[str, pd.DataFrame]] = None,
        plotted_noise: str = MEAN_AND_SD,
        subplot_dir: Optional[str] = None,
        plotter_type: str = 'mpl'
) -> Optional[Dict[str, plt.Subplot]]:
    """
    Plot measurements and/or simulations. What exactly should be plotted is
    specified in a grouping_list.
    If grouping list is not provided, measurements (simulations) will be
    grouped by observable, i.e. all measurements for each observable will be
    visualized on one plot.

    Parameters
    ----------
    grouping_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the Ids of datasets or observables or simulation conditions
        for this plot.
    group_by:
        Grouping type.
        Possible values: 'dataset', 'observable', 'simulation'
    conditions_df:
        A condition DataFrame in the PEtab format or path to the condition
        file.
    measurements_df:
        A measurement DataFrame in the PEtab format or path to the data file.
    simulations_df:
        A simulation DataFrame in the PEtab format or path to the simulation
        output data file.
    plotted_noise:
        A string indicating how noise should be visualized:
        ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']
    subplot_dir:
        A path to the folder where single subplots should be saved.
        PlotIDs will be taken as file names.
    plotter_type:
        Specifies which library should be used for plot generation. Currently,
        only matplotlib is supported

    Returns
    -------
    ax: Axis object of the created plot.
    None: In case subplots are saved to a file.
    """

    if measurements_df is None and simulations_df is None:
        raise TypeError('Not enough arguments. Either measurements_data '
                        'or simulations_data should be provided.')

    vis_spec_parser = VisSpecParser(conditions_df, measurements_df,
                                    simulations_df)

    figure, dataprovider = vis_spec_parser.parse_from_id_list(
        grouping_list, group_by, plotted_noise)

    if plotter_type == 'mpl':
        plotter = MPLPlotter(figure, dataprovider)
    else:
        raise NotImplementedError('Currently, only visualization with '
                                  'matplotlib is possible.')
    ax = plotter.generate_figure(subplot_dir)
    return ax


[docs]def plot_problem(petab_problem: problem.Problem,
                 simulations_df: Optional[Union[str, pd.DataFrame]] = None,
                 grouping_list: Optional[List[IdsList]] = None,
                 group_by: str = 'observable',
                 plotted_noise: str = MEAN_AND_SD,
                 subplot_dir: Optional[str] = None,
                 plotter_type: str = 'mpl'
                 ) -> Optional[Dict[str, plt.Subplot]]:
    """
    Visualization using petab problem.
    If Visualization table is part of the petab_problem, it will be used for
    visualization. Otherwise, grouping_list will be used.
    If neither Visualization table nor grouping_list are available,
    measurements (simulations) will be grouped by observable, i.e. all
    measurements for each observable will be visualized on one plot.

    Parameters
    ----------
    petab_problem:
        A PEtab problem
    simulations_df:
        A simulation DataFrame in the PEtab format or path to the simulation
        output data file.
    grouping_list:
        A list of lists. Each sublist corresponds to a plot, each subplot
        contains the Ids of datasets or observables or simulation conditions
        for this plot.
    group_by:
        Possible values: 'dataset', 'observable', 'simulation'
    plotted_noise:
        A string indicating how noise should be visualized:
        ['MeanAndSD' (default), 'MeanAndSEM', 'replicate', 'provided']
    subplot_dir:
        A string which is taken as path to the folder where single subplots
        should be saved. PlotIDs will be taken as file names.
    plotter_type:
        Specifies which library should be used for plot generation. Currently,
        only matplotlib is supported

    Returns
    -------
    ax: Axis object of the created plot.
    None: In case subplots are saved to a file.
    """

    if petab_problem.visualization_df is not None:
        ax = plot_with_vis_spec(petab_problem.visualization_df,
                                petab_problem.condition_df,
                                petab_problem.measurement_df,
                                simulations_df,
                                subplot_dir,
                                plotter_type)
    else:
        ax = plot_without_vis_spec(petab_problem.condition_df,
                                   grouping_list,
                                   group_by,
                                   petab_problem.measurement_df,
                                   simulations_df,
                                   plotted_noise,
                                   subplot_dir,
                                   plotter_type)
    return ax