Source code for pymodulon.io

"""
Functions for reading and writing data into files.
"""

import gzip
import json

import pandas as pd

from pymodulon.core import IcaData


[docs]def save_to_json(data, filename, compress=False): """ Save :class:`~pymodulon.core.IcaData` object to a json file Parameters ---------- data: ~pymodulon.core.IcaData ICA dataset to be saved to json file filename: str Path to json file where the data will be saved compress: bool Indicates if the JSON file should be compressed into a gzip archive Returns ------- None: None """ # only keeps params that are used to initialize the data load_params = IcaData.__init__.__code__.co_varnames param_dict = { key: getattr(data, key) for key in vars(IcaData) if key in load_params } # serialize pandas DataFrames and change sets to lists for key, val in param_dict.items(): if isinstance(val, pd.DataFrame): # Replace string NaN with empty string str_cols = val.select_dtypes(object).columns val[str_cols] = val[str_cols].fillna("") param_dict[key] = val.astype(str).to_json() elif isinstance(val, pd.Series): if val.dtype == object: newval = val.fillna("") param_dict[key] = newval.astype(str).to_json() elif isinstance(val, set): param_dict[key] = list(val) # Encode MotifInfo objects as dictionaries elif key == "motif_info": new_val = {} for k1, v1 in data.motif_info.items(): new_v1 = {} for k2, v2 in v1.__dict__.items(): try: new_v1[k2[1:]] = v2.to_json(orient="table") except AttributeError: new_v1[k2[1:]] = v2 new_val[k1] = new_v1 param_dict[key] = new_val # Add _cutoff_optimized param_dict["_cutoff_optimized"] = data.cutoff_optimized param_dict["_dagostino_cutoff"] = data.dagostino_cutoff if filename.endswith(".gz") or compress: if not filename.endswith(".json.gz"): filename += ".json.gz" with gzip.open(filename, "wt", encoding="ascii") as zipfile: json.dump(param_dict, zipfile) else: if not filename.endswith(".json"): filename += ".json" with open(filename, "w") as fp: json.dump(param_dict, fp)
[docs]def load_json_model(filename): """ Load :class:`~pymodulon.core.IcaData` object from a file in JSON format. Parameters ---------- filename : str or ~io._io.StringIO File path or descriptor that contains the JSON document describing the ICA dataset. Returns ------- IcaData : ~pymodulon.core.IcaData The :class:`~pymodulon.core.IcaData` object as represented in the JSON document. """ if isinstance(filename, str): if filename.endswith(".gz"): with gzip.GzipFile(filename, "r") as zipfile: serial_data = json.loads(zipfile.read().decode("utf-8")) else: with open(filename, "r") as file_handle: serial_data = json.load(file_handle) else: serial_data = json.load(filename) # Add cutoff information try: cutoff_optimized = serial_data.pop("_cutoff_optimized") except KeyError: cutoff_optimized = False try: dagostino_cutoff = serial_data.pop("_dagostino_cutoff") except KeyError: dagostino_cutoff = None # Remove deprecated arguments deprecated_args = ["cog_colors", "dataset_table", "splash_table", "link_database"] for arg in deprecated_args: if arg in serial_data.keys(): serial_data.pop(arg) data = IcaData(**serial_data) data._cutoff_optimized = cutoff_optimized data._dagostino_cutoff = dagostino_cutoff return data