1
0
Fork 0

Refactor method import_data in JsonImpoter

parallel_struct_est
philpMartin 4 years ago
parent 4efb8ba4fe
commit 1954487500
  1. 24
      PyCTBN/PyCTBN/abstract_importer.py
  2. 36
      PyCTBN/PyCTBN/json_importer.py
  3. 34
      PyCTBN/PyCTBN/original_ctpc_algorithm.py
  4. 13
      PyCTBN/PyCTBN/sample_path.py
  5. 2
      PyCTBN/PyCTBN/simple_cvs_importer.py
  6. 75
      PyCTBN/tests/test_json_importer.py
  7. 3
      PyCTBN/tests/test_networkgraph.py
  8. 6
      PyCTBN/tests/test_parameters_estimator.py
  9. 17
      PyCTBN/tests/test_sample_path.py
  10. 3
      PyCTBN/tests/test_structure_estimator.py
  11. 3
      basic_main.py
  12. 41
      documentation/rst/abstract_importer.rst

@ -13,6 +13,14 @@ class AbstractImporter(ABC):
:_df_structure: Dataframe containing the structure of the network (edges) :_df_structure: Dataframe containing the structure of the network (edges)
:_df_variables: Dataframe containing the nodes cardinalities :_df_variables: Dataframe containing the nodes cardinalities
:_sorter: A list containing the columns header (excluding the time column) of the `_concatenated_samples` :_sorter: A list containing the columns header (excluding the time column) of the `_concatenated_samples`
.. warning::
The class members ``_df_variables`` and ``_df_structure`` HAVE to be properly constructed
as Pandas Dataframes with the following structure:
Header of _df_structure = [From_Node | To_Node]
Header of _df_variables = [Variable_Label | Variable_Cardinality]
.. note::
See :class:``JsonImporter`` for an example implementation
""" """
def __init__(self, file_path: str): def __init__(self, file_path: str):
@ -24,21 +32,7 @@ class AbstractImporter(ABC):
self._concatenated_samples = None self._concatenated_samples = None
self._sorter = None self._sorter = None
super().__init__() super().__init__()
@abstractmethod
def import_data(self) -> None:
"""Imports all the trajectories, variables cardinalities, and net edges.
.. warning::
The class members ``_df_variables`` and ``_df_structure`` HAVE to be properly constructed
as Pandas Dataframes with the following structure:
Header of _df_structure = [From_Node | To_Node]
Header of _df_variables = [Variable_Label | Variable_Cardinality]
.. note::
See :class:``JsonImporter`` for an example of implementation of this method.
"""
pass
@abstractmethod @abstractmethod
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame

@ -8,8 +8,8 @@ from .abstract_importer import AbstractImporter
class JsonImporter(AbstractImporter): class JsonImporter(AbstractImporter):
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare the data in json ext. """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
with the following structure: the data in json extension with the following structure:
[0] [0]
|_ dyn.cims |_ dyn.cims
|_ dyn.str |_ dyn.str
@ -27,14 +27,20 @@ class JsonImporter(AbstractImporter):
:type time_key: string :type time_key: string
:param variables_key: the key used to identify the names of the variables in the net :param variables_key: the key used to identify the names of the variables in the net
:type variables_key: string :type variables_key: string
:param array_indx: the index of the outer JsonArray to exctract the data from :_array_indx: the index of the outer JsonArray to extract the data from
:type array_indx: int :type _array_indx: int
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
""" """
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str, array_indx: int): variables_key: str):
"""Constructor method """Constructor method
.. note::
This constructor calls also the method ``read_json_file()``, so after the construction of the object
the class member ``_raw_data`` will contain the raw imported json data.
""" """
self._samples_label = samples_label self._samples_label = samples_label
self._structure_label = structure_label self._structure_label = structure_label
@ -42,19 +48,23 @@ class JsonImporter(AbstractImporter):
self._time_key = time_key self._time_key = time_key
self._variables_key = variables_key self._variables_key = variables_key
self._df_samples_list = None self._df_samples_list = None
self._array_indx = array_indx self._array_indx = None
super(JsonImporter, self).__init__(file_path) super(JsonImporter, self).__init__(file_path)
self._raw_data = self.read_json_file()
def import_data(self, indx: int) -> None:
"""Implements the abstract method of :class:`AbstractImporter`.
def import_data(self) -> None: :param indx: the index of the outer JsonArray to extract the data from
"""Implements the abstract method of :class:`AbstractImporter` :type indx: int
""" """
raw_data = self.read_json_file() self._array_indx = indx
self._df_samples_list = self.import_trajectories(raw_data) self._df_samples_list = self.import_trajectories(self._raw_data)
self._sorter = self.build_sorter(self._df_samples_list[0]) self._sorter = self.build_sorter(self._df_samples_list[0])
self.compute_row_delta_in_all_samples_frames(self._df_samples_list) self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
self.clear_data_frame_list() self.clear_data_frame_list()
self._df_structure = self.import_structure(raw_data) self._df_structure = self.import_structure(self._raw_data)
self._df_variables = self.import_variables(raw_data) self._df_variables = self.import_variables(self._raw_data)
def import_trajectories(self, raw_data: typing.List) -> typing.List: def import_trajectories(self, raw_data: typing.List) -> typing.List:
"""Imports the trajectories from the list of dicts ``raw_data``. """Imports the trajectories from the list of dicts ``raw_data``.
@ -87,7 +97,7 @@ class JsonImporter(AbstractImporter):
return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label) return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)
def read_json_file(self) -> typing.List: def read_json_file(self) -> typing.List:
"""Reads the JSON file in the path self.filePath """Reads the JSON file in the path self.filePath.
:return: The contents of the json file :return: The contents of the json file
:rtype: List :rtype: List

@ -1,12 +1,10 @@
import glob
import json import json
import os
from itertools import combinations from itertools import combinations
import typing import typing
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from line_profiler import LineProfiler
from scipy.stats import chi2 as chi2_dist from scipy.stats import chi2 as chi2_dist
from scipy.stats import f as f_dist from scipy.stats import f as f_dist
from tqdm import tqdm from tqdm import tqdm
@ -36,7 +34,7 @@ class OriginalCTPCAlgorithm(AbstractImporter):
pass pass
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str, def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str, array_indx: int): variables_key: str, raw_data: typing.List):
""" """
Parameters: Parameters:
file_path: the path of the file that contains tha data to be imported file_path: the path of the file that contains tha data to be imported
@ -53,11 +51,12 @@ class OriginalCTPCAlgorithm(AbstractImporter):
self.variables_key = variables_key self.variables_key = variables_key
self.df_samples_list = None self.df_samples_list = None
self.trajectories = None self.trajectories = None
self._array_indx = array_indx self._array_indx = None
self.matrix = None self.matrix = None
super(OriginalCTPCAlgorithm, self).__init__(file_path) super(OriginalCTPCAlgorithm, self).__init__(file_path)
self._raw_data = raw_data
def import_data(self): def import_data(self, indx):
""" """
Imports and prepares all data present needed for subsequent processing. Imports and prepares all data present needed for subsequent processing.
Parameters: Parameters:
@ -65,14 +64,16 @@ class OriginalCTPCAlgorithm(AbstractImporter):
Returns: Returns:
_void _void
""" """
raw_data = self.read_json_file() self._array_indx = indx
self.df_samples_list = self.import_trajectories(raw_data) self.df_samples_list = self.import_trajectories(self._raw_data)
self._sorter = self.build_sorter(self.df_samples_list[0]) self._sorter = self.build_sorter(self.df_samples_list[0])
#self.compute_row_delta_in_all_samples_frames(self._df_samples_list) #self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
#self.clear_data_frame_list() #self.clear_data_frame_list()
self._df_structure = self.import_structure(raw_data) self._df_structure = self.import_structure(self._raw_data)
self._df_variables = self.import_variables(raw_data, self._sorter) self._df_variables = self.import_variables(self._raw_data, self._sorter)
def datasets_numb(self):
return len(self._raw_data)
def import_trajectories(self, raw_data: typing.List): def import_trajectories(self, raw_data: typing.List):
""" """
@ -107,15 +108,7 @@ class OriginalCTPCAlgorithm(AbstractImporter):
:Datframe containg the variables simbolic labels and their cardinalities :Datframe containg the variables simbolic labels and their cardinalities
""" """
return self.one_level_normalizing(raw_data, self._array_indx, self.variables_label) return self.one_level_normalizing(raw_data, self._array_indx, self.variables_label)
#TODO Usando come Pre-requisito l'ordinamento del frame _df_variables uguale a quello presente in
#TODO self _sorter questo codice risulta inutile
"""self._df_variables[self._variables_key] = self._df_variables[self._variables_key].astype("category")
self._df_variables[self._variables_key] = self._df_variables[self._variables_key].cat.set_categories(sorter)
self._df_variables = self._df_variables.sort_values([self._variables_key])
self._df_variables.reset_index(inplace=True)
self._df_variables.drop('index', axis=1, inplace=True)
#print("Var Frame", self._df_variables)
"""
def read_json_file(self) -> typing.List: def read_json_file(self) -> typing.List:
""" """
@ -160,9 +153,6 @@ class OriginalCTPCAlgorithm(AbstractImporter):
smps = raw_data[indx][trajectories_key] smps = raw_data[indx][trajectories_key]
df_samples_list = [dataframe(sample) for sample in smps] df_samples_list = [dataframe(sample) for sample in smps]
return df_samples_list return df_samples_list
#columns_header = list(self._df_samples_list[0].columns.values)
#columns_header.remove(self._time_key)
#self._sorter = columns_header
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List: def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
""" """

@ -12,20 +12,25 @@ class SamplePath:
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
contain the mentioned data. contain the mentioned data.
:param importer: the Importer objects that will import ad process data :param importer: the Importer object which contains the imported and processed data
:type importer: AbstractImporter :type importer: AbstractImporter
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
:_structure: the ``Structure`` Object that will contain all the structurral infos about the net :_structure: the ``Structure`` Object that will contain all the structural infos about the net
:_total_variables_count: the number of variables in the net :_total_variables_count: the number of variables in the net
""" """
def __init__(self, importer: AbstractImporter): def __init__(self, importer: AbstractImporter):
"""Constructor Method """Constructor Method
""" """
self._importer = importer self._importer = importer
if (self._importer._df_variables is None or self._importer._df_structure is None
or self._importer._concatenated_samples is None):
raise RuntimeError('The importer object has to contain the all processed data!')
if(self._importer._df_variables.empty or self._importer._df_structure.empty
or self._importer._concatenated_samples.empty):
raise RuntimeError('The importer object has to contain the all processed data!')
self._trajectories = None self._trajectories = None
self._structure = None self._structure = None
self._total_variables_count = None self._total_variables_count = None
self._importer.import_data()
def build_trajectories(self) -> None: def build_trajectories(self) -> None:
"""Builds the Trajectory object that will contain all the trajectories. """Builds the Trajectory object that will contain all the trajectories.
@ -60,7 +65,7 @@ class SamplePath:
return self._structure return self._structure
@property @property
def total_variables_count(self): def total_variables_count(self) -> int:
return self._total_variables_count return self._total_variables_count

@ -4,8 +4,6 @@ import os
import typing import typing
#import abstract_importer as ai
#import sample_path as sp
from .abstract_importer import AbstractImporter from .abstract_importer import AbstractImporter
from .sample_path import SamplePath from .sample_path import SamplePath

@ -6,6 +6,7 @@ import numpy as np
import pandas as pd import pandas as pd
import json import json
from ..PyCTBN.json_importer import JsonImporter from ..PyCTBN.json_importer import JsonImporter
@ -14,10 +15,9 @@ class TestJsonImporter(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls) -> None: def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./data', "*.json")) cls.read_files = glob.glob(os.path.join('./data', "*.json"))
#print(os.path.join('../data'))
def test_init(self): def test_init(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertEqual(j1._samples_label, 'samples') self.assertEqual(j1._samples_label, 'samples')
self.assertEqual(j1._structure_label, 'dyn.str') self.assertEqual(j1._structure_label, 'dyn.str')
self.assertEqual(j1._variables_label, 'variables') self.assertEqual(j1._variables_label, 'variables')
@ -29,6 +29,8 @@ class TestJsonImporter(unittest.TestCase):
self.assertIsNone(j1.structure) self.assertIsNone(j1.structure)
self.assertIsNone(j1.concatenated_samples) self.assertIsNone(j1.concatenated_samples)
self.assertIsNone(j1.sorter) self.assertIsNone(j1.sorter)
self.assertIsNone(j1._array_indx)
self.assertIsInstance(j1._raw_data, list)
def test_read_json_file_found(self): def test_read_json_file_found(self):
data_set = {"key1": [1, 2, 3], "key2": [4, 5, 6]} data_set = {"key1": [1, 2, 3], "key2": [4, 5, 6]}
@ -36,39 +38,37 @@ class TestJsonImporter(unittest.TestCase):
json.dump(data_set, f) json.dump(data_set, f)
path = os.getcwd() path = os.getcwd()
path = path + '/data.json' path = path + '/data.json'
j1 = JsonImporter(path, '', '', '', '', '', 0) j1 = JsonImporter(path, '', '', '', '', '')
imported_data = j1.read_json_file() #imported_data = j1.read_json_file()
self.assertTrue(self.ordered(data_set) == self.ordered(imported_data)) self.assertTrue(self.ordered(data_set) == self.ordered(j1._raw_data))
os.remove('data.json') os.remove('data.json')
def test_read_json_file_not_found(self): def test_read_json_file_not_found(self):
path = os.getcwd() path = os.getcwd()
path = path + '/data.json' path = path + '/data.json'
j1 = JsonImporter(path, '', '', '', '', '', 0) #j1 = JsonImporter(path, '', '', '', '', '')
self.assertRaises(FileNotFoundError, j1.read_json_file) self.assertRaises(FileNotFoundError, JsonImporter, path, '', '', '', '', '')
def test_normalize_trajectories(self): def test_normalize_trajectories(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() #raw_data = j1.read_json_file()
#print(raw_data) #print(raw_data)
df_samples_list = j1.normalize_trajectories(raw_data, 0, j1._samples_label) df_samples_list = j1.normalize_trajectories(j1._raw_data, 0, j1._samples_label)
self.assertEqual(len(df_samples_list), len(raw_data[0][j1._samples_label])) self.assertEqual(len(df_samples_list), len(j1._raw_data[0][j1._samples_label]))
#self.assertEqual(list(j1._df_samples_list[0].columns.values)[1:], j1.sorter)
def test_normalize_trajectories_wrong_indx(self): def test_normalize_trajectories_wrong_indx(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() self.assertRaises(IndexError, j1.normalize_trajectories, j1._raw_data, 474, j1._samples_label)
self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 474, j1._samples_label)
def test_normalize_trajectories_wrong_key(self): def test_normalize_trajectories_wrong_key(self):
j1 = JsonImporter(self.read_files[0], 'sample', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'sample', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() self.assertRaises(KeyError, j1.normalize_trajectories, j1._raw_data, 0, j1._samples_label)
self.assertRaises(KeyError, j1.normalize_trajectories, raw_data, 0, j1._samples_label)
def test_compute_row_delta_single_samples_frame(self): def test_compute_row_delta_single_samples_frame(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() #raw_data = j1.read_json_file()
j1._df_samples_list = j1.import_trajectories(raw_data) j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(j1._raw_data)
sample_frame = j1._df_samples_list[0] sample_frame = j1._df_samples_list[0]
original_copy = sample_frame.copy() original_copy = sample_frame.copy()
columns_header = list(sample_frame.columns.values) columns_header = list(sample_frame.columns.values)
@ -88,9 +88,10 @@ class TestJsonImporter(unittest.TestCase):
np.array(original_copy.iloc[indx + 1][columns_header[1:]], dtype=int)) np.array(original_copy.iloc[indx + 1][columns_header[1:]], dtype=int))
def test_compute_row_delta_in_all_frames(self): def test_compute_row_delta_in_all_frames(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() #raw_data = j1.read_json_file()
j1._df_samples_list = j1.import_trajectories(raw_data) j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(j1._raw_data)
j1._sorter = j1.build_sorter(j1._df_samples_list[0]) j1._sorter = j1.build_sorter(j1._df_samples_list[0])
j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list) j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list)
self.assertEqual(list(j1._df_samples_list[0].columns.values), self.assertEqual(list(j1._df_samples_list[0].columns.values),
@ -98,9 +99,10 @@ class TestJsonImporter(unittest.TestCase):
self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1._time_key) self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1._time_key)
def test_clear_data_frame_list(self): def test_clear_data_frame_list(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() #raw_data = j1.read_json_file()
j1._df_samples_list = j1.import_trajectories(raw_data) j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(j1._raw_data)
j1._sorter = j1.build_sorter(j1._df_samples_list[0]) j1._sorter = j1.build_sorter(j1._df_samples_list[0])
j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list) j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list)
j1.clear_data_frame_list() j1.clear_data_frame_list()
@ -108,8 +110,8 @@ class TestJsonImporter(unittest.TestCase):
self.assertTrue(df.empty) self.assertTrue(df.empty)
def test_clear_concatenated_frame(self): def test_clear_concatenated_frame(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1.import_data() j1.import_data(0)
j1.clear_concatenated_frame() j1.clear_concatenated_frame()
self.assertTrue(j1.concatenated_samples.empty) self.assertTrue(j1.concatenated_samples.empty)
@ -119,7 +121,7 @@ class TestJsonImporter(unittest.TestCase):
json.dump(data_set, f) json.dump(data_set, f)
path = os.getcwd() path = os.getcwd()
path = path + '/data.json' path = path + '/data.json'
j1 = JsonImporter(path, '', '', '', '', '', 0) j1 = JsonImporter(path, '', '', '', '', '')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
frame = pd.DataFrame(raw_data) frame = pd.DataFrame(raw_data)
col_list = j1.build_list_of_samples_array(frame) col_list = j1.build_list_of_samples_array(frame)
@ -131,22 +133,25 @@ class TestJsonImporter(unittest.TestCase):
os.remove('data.json') os.remove('data.json')
def test_import_variables(self): def test_import_variables(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
sorter = ['X', 'Y', 'Z'] sorter = ['X', 'Y', 'Z']
raw_data = [{'variables':{"Name": ['X', 'Y', 'Z'], "value": [3, 3, 3]}}] raw_data = [{'variables':{"Name": ['X', 'Y', 'Z'], "value": [3, 3, 3]}}]
j1._array_indx = 0
df_var = j1.import_variables(raw_data) df_var = j1.import_variables(raw_data)
self.assertEqual(list(df_var[j1._variables_key]), sorter) self.assertEqual(list(df_var[j1._variables_key]), sorter)
def test_import_structure(self): def test_import_structure(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = [{"dyn.str":[{"From":"X","To":"Z"},{"From":"Y","To":"Z"},{"From":"Z","To":"Y"}]}] raw_data = [{"dyn.str":[{"From":"X","To":"Z"},{"From":"Y","To":"Z"},{"From":"Z","To":"Y"}]}]
j1._array_indx = 0
df_struct = j1.import_structure(raw_data) df_struct = j1.import_structure(raw_data)
#print(raw_data[0]['dyn.str'][0].items()) #print(raw_data[0]['dyn.str'][0].items())
self.assertIsInstance(df_struct, pd.DataFrame) self.assertIsInstance(df_struct, pd.DataFrame)
def test_import_sampled_cims(self): def test_import_sampled_cims(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file() raw_data = j1.read_json_file()
j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(raw_data) j1._df_samples_list = j1.import_trajectories(raw_data)
j1._sorter = j1.build_sorter(j1._df_samples_list[0]) j1._sorter = j1.build_sorter(j1._df_samples_list[0])
cims = j1.import_sampled_cims(raw_data, 0, 'dyn.cims') cims = j1.import_sampled_cims(raw_data, 0, 'dyn.cims')
@ -154,8 +159,8 @@ class TestJsonImporter(unittest.TestCase):
self.assertEqual(list(cims.keys()), j1.sorter) self.assertEqual(list(cims.keys()), j1.sorter)
def test_import_data(self): def test_import_data(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 2) j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1.import_data() j1.import_data(0)
self.assertEqual(list(j1.variables[j1._variables_key]), self.assertEqual(list(j1.variables[j1._variables_key]),
list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1._variables_key]) + 1])) list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1._variables_key]) + 1]))
print(j1.variables) print(j1.variables)

@ -15,7 +15,8 @@ class TestNetworkGraph(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.read_files = glob.glob(os.path.join('./data', "*.json")) cls.read_files = glob.glob(os.path.join('./data', "*.json"))
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(0)
cls.s1 = SamplePath(cls.importer) cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories() cls.s1.build_trajectories()
cls.s1.build_structure() cls.s1.build_structure()

@ -17,8 +17,8 @@ class TestParametersEstimatior(unittest.TestCase):
def setUpClass(cls) -> None: def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./data', "*.json")) cls.read_files = glob.glob(os.path.join('./data', "*.json"))
cls.array_indx = 0 cls.array_indx = 0
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.array_indx) cls.importer.import_data(cls.array_indx)
cls.s1 = SamplePath(cls.importer) cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories() cls.s1.build_trajectories()
cls.s1.build_structure() cls.s1.build_structure()
@ -58,7 +58,7 @@ class TestParametersEstimatior(unittest.TestCase):
self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True)) self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True))
def aux_import_sampled_cims(self, cims_label): def aux_import_sampled_cims(self, cims_label):
i1 = JsonImporter(self.read_files[0], '', '', '', '', '', self.array_indx) i1 = JsonImporter(self.read_files[0], '', '', '', '', '')
raw_data = i1.read_json_file() raw_data = i1.read_json_file()
return i1.import_sampled_cims(raw_data, self.array_indx, cims_label) return i1.import_sampled_cims(raw_data, self.array_indx, cims_label)

@ -14,22 +14,31 @@ class TestSamplePath(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls) -> None: def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./data', "*.json")) cls.read_files = glob.glob(os.path.join('./data', "*.json"))
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
def test_init_not_initialized_importer(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertRaises(RuntimeError, SamplePath, importer)
def test_init(self): def test_init(self):
s1 = SamplePath(self.importer) importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
s1 = SamplePath(importer)
self.assertIsNone(s1.trajectories) self.assertIsNone(s1.trajectories)
self.assertIsNone(s1.structure) self.assertIsNone(s1.structure)
self.assertFalse(s1._importer.concatenated_samples.empty) self.assertFalse(s1._importer.concatenated_samples.empty)
self.assertIsNone(s1._total_variables_count) self.assertIsNone(s1._total_variables_count)
def test_build_trajectories(self): def test_build_trajectories(self):
s1 = SamplePath(self.importer) importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
s1 = SamplePath(importer)
s1.build_trajectories() s1.build_trajectories()
self.assertIsInstance(s1.trajectories, Trajectory) self.assertIsInstance(s1.trajectories, Trajectory)
def test_build_structure(self): def test_build_structure(self):
s1 = SamplePath(self.importer) importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
s1 = SamplePath(importer)
s1.build_structure() s1.build_structure()
self.assertIsInstance(s1.structure, Structure) self.assertIsInstance(s1.structure, Structure)
self.assertEqual(s1._total_variables_count, len(s1._importer.sorter)) self.assertEqual(s1._total_variables_count, len(s1._importer.sorter))

@ -21,7 +21,8 @@ class TestStructureEstimator(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.read_files = glob.glob(os.path.join('./data', "*.json")) cls.read_files = glob.glob(os.path.join('./data', "*.json"))
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0) cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(0)
cls.s1 = SamplePath(cls.importer) cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories() cls.s1.build_trajectories()
cls.s1.build_structure() cls.s1.build_structure()

@ -12,7 +12,8 @@ from PyCTBN.parameters_estimator import ParametersEstimator
def main(): def main():
read_files = glob.glob(os.path.join('./data', "*.json")) #Take all json files in this dir read_files = glob.glob(os.path.join('./data', "*.json")) #Take all json files in this dir
#import data #import data
importer = JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 1) importer = JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
#Create a SamplePath Obj #Create a SamplePath Obj
s1 = SamplePath(importer) s1 = SamplePath(importer)
#Build The trajectries and the structural infos #Build The trajectries and the structural infos

@ -5,3 +5,44 @@ abstract\_importer module
:members: :members:
:undoc-members: :undoc-members:
:show-inheritance: :show-inheritance:
An example of a simple CSV Importer
===================================
Suppose you have a csv dataset containing only the trajectories, three variables labels and cardinalites.
Then the resulting importer that inherit and extends AbstractImpoter would be:
.. code_block:: python
class CSVImporter(AbstractImporter):
def __init__(self, file_path):
self._df_samples_list = None
super(CSVImporter, self).__init__(file_path)
def import_data(self):
self.read_csv_file()
self._sorter = self.build_sorter(self._df_samples_list[0])
self.import_variables()
self.import_structure()
self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
def read_csv_file(self):
df = pd.read_csv(self._file_path)
df.drop(df.columns[[0]], axis=1, inplace=True)
self._df_samples_list = [df]
def import_variables(self):
values_list = [3 for var in self._sorter]
# initialize dict of lists
data = {'Name':self._sorter, 'Value':values_list}
# Create the pandas DataFrame
self._df_variables = pd.DataFrame(data)
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
return list(sample_frame.columns)[1:]
def import_structure(self):
data = {'From':['X','Y','Z'], 'To':['Z','Z','Y']}
self._df_structure = pd.DataFrame(data)
def dataset_id(self) -> object:
pass