1
0
Fork 0

Refactor method import_data in JsonImpoter

parallel_struct_est
philpMartin 4 years ago
parent 4efb8ba4fe
commit 1954487500
  1. 24
      PyCTBN/PyCTBN/abstract_importer.py
  2. 36
      PyCTBN/PyCTBN/json_importer.py
  3. 34
      PyCTBN/PyCTBN/original_ctpc_algorithm.py
  4. 13
      PyCTBN/PyCTBN/sample_path.py
  5. 2
      PyCTBN/PyCTBN/simple_cvs_importer.py
  6. 75
      PyCTBN/tests/test_json_importer.py
  7. 3
      PyCTBN/tests/test_networkgraph.py
  8. 6
      PyCTBN/tests/test_parameters_estimator.py
  9. 17
      PyCTBN/tests/test_sample_path.py
  10. 3
      PyCTBN/tests/test_structure_estimator.py
  11. 3
      basic_main.py
  12. 41
      documentation/rst/abstract_importer.rst

@ -13,6 +13,14 @@ class AbstractImporter(ABC):
:_df_structure: Dataframe containing the structure of the network (edges)
:_df_variables: Dataframe containing the nodes cardinalities
:_sorter: A list containing the columns header (excluding the time column) of the `_concatenated_samples`
.. warning::
The class members ``_df_variables`` and ``_df_structure`` HAVE to be properly constructed
as Pandas Dataframes with the following structure:
Header of _df_structure = [From_Node | To_Node]
Header of _df_variables = [Variable_Label | Variable_Cardinality]
.. note::
See :class:``JsonImporter`` for an example implementation
"""
def __init__(self, file_path: str):
@ -24,21 +32,7 @@ class AbstractImporter(ABC):
self._concatenated_samples = None
self._sorter = None
super().__init__()
@abstractmethod
def import_data(self) -> None:
"""Imports all the trajectories, variables cardinalities, and net edges.
.. warning::
The class members ``_df_variables`` and ``_df_structure`` HAVE to be properly constructed
as Pandas Dataframes with the following structure:
Header of _df_structure = [From_Node | To_Node]
Header of _df_variables = [Variable_Label | Variable_Cardinality]
.. note::
See :class:``JsonImporter`` for an example of implementation of this method.
"""
pass
@abstractmethod
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame

@ -8,8 +8,8 @@ from .abstract_importer import AbstractImporter
class JsonImporter(AbstractImporter):
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare the data in json ext.
with the following structure:
"""Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
the data in json extension with the following structure:
[0]
|_ dyn.cims
|_ dyn.str
@ -27,14 +27,20 @@ class JsonImporter(AbstractImporter):
:type time_key: string
:param variables_key: the key used to identify the names of the variables in the net
:type variables_key: string
:param array_indx: the index of the outer JsonArray to exctract the data from
:type array_indx: int
:_array_indx: the index of the outer JsonArray to extract the data from
:type _array_indx: int
:_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
:_raw_data: The raw contents of the json file to import
:type _raw_data: List
"""
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str, array_indx: int):
variables_key: str):
"""Constructor method
.. note::
This constructor calls also the method ``read_json_file()``, so after the construction of the object
the class member ``_raw_data`` will contain the raw imported json data.
"""
self._samples_label = samples_label
self._structure_label = structure_label
@ -42,19 +48,23 @@ class JsonImporter(AbstractImporter):
self._time_key = time_key
self._variables_key = variables_key
self._df_samples_list = None
self._array_indx = array_indx
self._array_indx = None
super(JsonImporter, self).__init__(file_path)
self._raw_data = self.read_json_file()
def import_data(self, indx: int) -> None:
"""Implements the abstract method of :class:`AbstractImporter`.
def import_data(self) -> None:
"""Implements the abstract method of :class:`AbstractImporter`
:param indx: the index of the outer JsonArray to extract the data from
:type indx: int
"""
raw_data = self.read_json_file()
self._df_samples_list = self.import_trajectories(raw_data)
self._array_indx = indx
self._df_samples_list = self.import_trajectories(self._raw_data)
self._sorter = self.build_sorter(self._df_samples_list[0])
self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
self.clear_data_frame_list()
self._df_structure = self.import_structure(raw_data)
self._df_variables = self.import_variables(raw_data)
self._df_structure = self.import_structure(self._raw_data)
self._df_variables = self.import_variables(self._raw_data)
def import_trajectories(self, raw_data: typing.List) -> typing.List:
"""Imports the trajectories from the list of dicts ``raw_data``.
@ -87,7 +97,7 @@ class JsonImporter(AbstractImporter):
return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)
def read_json_file(self) -> typing.List:
"""Reads the JSON file in the path self.filePath
"""Reads the JSON file in the path self.filePath.
:return: The contents of the json file
:rtype: List

@ -1,12 +1,10 @@
import glob
import json
import os
from itertools import combinations
import typing
import numpy as np
import pandas as pd
from line_profiler import LineProfiler
from scipy.stats import chi2 as chi2_dist
from scipy.stats import f as f_dist
from tqdm import tqdm
@ -36,7 +34,7 @@ class OriginalCTPCAlgorithm(AbstractImporter):
pass
def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
variables_key: str, array_indx: int):
variables_key: str, raw_data: typing.List):
"""
Parameters:
file_path: the path of the file that contains tha data to be imported
@ -53,11 +51,12 @@ class OriginalCTPCAlgorithm(AbstractImporter):
self.variables_key = variables_key
self.df_samples_list = None
self.trajectories = None
self._array_indx = array_indx
self._array_indx = None
self.matrix = None
super(OriginalCTPCAlgorithm, self).__init__(file_path)
self._raw_data = raw_data
def import_data(self):
def import_data(self, indx):
"""
Imports and prepares all data present needed for subsequent processing.
Parameters:
@ -65,14 +64,16 @@ class OriginalCTPCAlgorithm(AbstractImporter):
Returns:
_void
"""
raw_data = self.read_json_file()
self.df_samples_list = self.import_trajectories(raw_data)
self._array_indx = indx
self.df_samples_list = self.import_trajectories(self._raw_data)
self._sorter = self.build_sorter(self.df_samples_list[0])
#self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
#self.clear_data_frame_list()
self._df_structure = self.import_structure(raw_data)
self._df_variables = self.import_variables(raw_data, self._sorter)
self._df_structure = self.import_structure(self._raw_data)
self._df_variables = self.import_variables(self._raw_data, self._sorter)
def datasets_numb(self):
return len(self._raw_data)
def import_trajectories(self, raw_data: typing.List):
"""
@ -107,15 +108,7 @@ class OriginalCTPCAlgorithm(AbstractImporter):
:Datframe containg the variables simbolic labels and their cardinalities
"""
return self.one_level_normalizing(raw_data, self._array_indx, self.variables_label)
#TODO Usando come Pre-requisito l'ordinamento del frame _df_variables uguale a quello presente in
#TODO self _sorter questo codice risulta inutile
"""self._df_variables[self._variables_key] = self._df_variables[self._variables_key].astype("category")
self._df_variables[self._variables_key] = self._df_variables[self._variables_key].cat.set_categories(sorter)
self._df_variables = self._df_variables.sort_values([self._variables_key])
self._df_variables.reset_index(inplace=True)
self._df_variables.drop('index', axis=1, inplace=True)
#print("Var Frame", self._df_variables)
"""
def read_json_file(self) -> typing.List:
"""
@ -160,9 +153,6 @@ class OriginalCTPCAlgorithm(AbstractImporter):
smps = raw_data[indx][trajectories_key]
df_samples_list = [dataframe(sample) for sample in smps]
return df_samples_list
#columns_header = list(self._df_samples_list[0].columns.values)
#columns_header.remove(self._time_key)
#self._sorter = columns_header
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
"""

@ -12,20 +12,25 @@ class SamplePath:
cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
contain the mentioned data.
:param importer: the Importer objects that will import ad process data
:param importer: the Importer object which contains the imported and processed data
:type importer: AbstractImporter
:_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
:_structure: the ``Structure`` Object that will contain all the structurral infos about the net
:_structure: the ``Structure`` Object that will contain all the structural infos about the net
:_total_variables_count: the number of variables in the net
"""
def __init__(self, importer: AbstractImporter):
"""Constructor Method
"""
self._importer = importer
if (self._importer._df_variables is None or self._importer._df_structure is None
or self._importer._concatenated_samples is None):
raise RuntimeError('The importer object has to contain the all processed data!')
if(self._importer._df_variables.empty or self._importer._df_structure.empty
or self._importer._concatenated_samples.empty):
raise RuntimeError('The importer object has to contain the all processed data!')
self._trajectories = None
self._structure = None
self._total_variables_count = None
self._importer.import_data()
def build_trajectories(self) -> None:
"""Builds the Trajectory object that will contain all the trajectories.
@ -60,7 +65,7 @@ class SamplePath:
return self._structure
@property
def total_variables_count(self):
def total_variables_count(self) -> int:
return self._total_variables_count

@ -4,8 +4,6 @@ import os
import typing
#import abstract_importer as ai
#import sample_path as sp
from .abstract_importer import AbstractImporter
from .sample_path import SamplePath

@ -6,6 +6,7 @@ import numpy as np
import pandas as pd
import json
from ..PyCTBN.json_importer import JsonImporter
@ -14,10 +15,9 @@ class TestJsonImporter(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./data', "*.json"))
#print(os.path.join('../data'))
def test_init(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertEqual(j1._samples_label, 'samples')
self.assertEqual(j1._structure_label, 'dyn.str')
self.assertEqual(j1._variables_label, 'variables')
@ -29,6 +29,8 @@ class TestJsonImporter(unittest.TestCase):
self.assertIsNone(j1.structure)
self.assertIsNone(j1.concatenated_samples)
self.assertIsNone(j1.sorter)
self.assertIsNone(j1._array_indx)
self.assertIsInstance(j1._raw_data, list)
def test_read_json_file_found(self):
data_set = {"key1": [1, 2, 3], "key2": [4, 5, 6]}
@ -36,39 +38,37 @@ class TestJsonImporter(unittest.TestCase):
json.dump(data_set, f)
path = os.getcwd()
path = path + '/data.json'
j1 = JsonImporter(path, '', '', '', '', '', 0)
imported_data = j1.read_json_file()
self.assertTrue(self.ordered(data_set) == self.ordered(imported_data))
j1 = JsonImporter(path, '', '', '', '', '')
#imported_data = j1.read_json_file()
self.assertTrue(self.ordered(data_set) == self.ordered(j1._raw_data))
os.remove('data.json')
def test_read_json_file_not_found(self):
path = os.getcwd()
path = path + '/data.json'
j1 = JsonImporter(path, '', '', '', '', '', 0)
self.assertRaises(FileNotFoundError, j1.read_json_file)
#j1 = JsonImporter(path, '', '', '', '', '')
self.assertRaises(FileNotFoundError, JsonImporter, path, '', '', '', '', '')
def test_normalize_trajectories(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
raw_data = j1.read_json_file()
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
#raw_data = j1.read_json_file()
#print(raw_data)
df_samples_list = j1.normalize_trajectories(raw_data, 0, j1._samples_label)
self.assertEqual(len(df_samples_list), len(raw_data[0][j1._samples_label]))
#self.assertEqual(list(j1._df_samples_list[0].columns.values)[1:], j1.sorter)
df_samples_list = j1.normalize_trajectories(j1._raw_data, 0, j1._samples_label)
self.assertEqual(len(df_samples_list), len(j1._raw_data[0][j1._samples_label]))
def test_normalize_trajectories_wrong_indx(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
raw_data = j1.read_json_file()
self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 474, j1._samples_label)
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertRaises(IndexError, j1.normalize_trajectories, j1._raw_data, 474, j1._samples_label)
def test_normalize_trajectories_wrong_key(self):
j1 = JsonImporter(self.read_files[0], 'sample', 'dyn.str', 'variables', 'Time', 'Name', 0)
raw_data = j1.read_json_file()
self.assertRaises(KeyError, j1.normalize_trajectories, raw_data, 0, j1._samples_label)
j1 = JsonImporter(self.read_files[0], 'sample', 'dyn.str', 'variables', 'Time', 'Name')
self.assertRaises(KeyError, j1.normalize_trajectories, j1._raw_data, 0, j1._samples_label)
def test_compute_row_delta_single_samples_frame(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
raw_data = j1.read_json_file()
j1._df_samples_list = j1.import_trajectories(raw_data)
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
#raw_data = j1.read_json_file()
j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(j1._raw_data)
sample_frame = j1._df_samples_list[0]
original_copy = sample_frame.copy()
columns_header = list(sample_frame.columns.values)
@ -88,9 +88,10 @@ class TestJsonImporter(unittest.TestCase):
np.array(original_copy.iloc[indx + 1][columns_header[1:]], dtype=int))
def test_compute_row_delta_in_all_frames(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
raw_data = j1.read_json_file()
j1._df_samples_list = j1.import_trajectories(raw_data)
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
#raw_data = j1.read_json_file()
j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(j1._raw_data)
j1._sorter = j1.build_sorter(j1._df_samples_list[0])
j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list)
self.assertEqual(list(j1._df_samples_list[0].columns.values),
@ -98,9 +99,10 @@ class TestJsonImporter(unittest.TestCase):
self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1._time_key)
def test_clear_data_frame_list(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
raw_data = j1.read_json_file()
j1._df_samples_list = j1.import_trajectories(raw_data)
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
#raw_data = j1.read_json_file()
j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(j1._raw_data)
j1._sorter = j1.build_sorter(j1._df_samples_list[0])
j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list)
j1.clear_data_frame_list()
@ -108,8 +110,8 @@ class TestJsonImporter(unittest.TestCase):
self.assertTrue(df.empty)
def test_clear_concatenated_frame(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
j1.import_data()
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1.import_data(0)
j1.clear_concatenated_frame()
self.assertTrue(j1.concatenated_samples.empty)
@ -119,7 +121,7 @@ class TestJsonImporter(unittest.TestCase):
json.dump(data_set, f)
path = os.getcwd()
path = path + '/data.json'
j1 = JsonImporter(path, '', '', '', '', '', 0)
j1 = JsonImporter(path, '', '', '', '', '')
raw_data = j1.read_json_file()
frame = pd.DataFrame(raw_data)
col_list = j1.build_list_of_samples_array(frame)
@ -131,22 +133,25 @@ class TestJsonImporter(unittest.TestCase):
os.remove('data.json')
def test_import_variables(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
sorter = ['X', 'Y', 'Z']
raw_data = [{'variables':{"Name": ['X', 'Y', 'Z'], "value": [3, 3, 3]}}]
j1._array_indx = 0
df_var = j1.import_variables(raw_data)
self.assertEqual(list(df_var[j1._variables_key]), sorter)
def test_import_structure(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = [{"dyn.str":[{"From":"X","To":"Z"},{"From":"Y","To":"Z"},{"From":"Z","To":"Y"}]}]
j1._array_indx = 0
df_struct = j1.import_structure(raw_data)
#print(raw_data[0]['dyn.str'][0].items())
self.assertIsInstance(df_struct, pd.DataFrame)
def test_import_sampled_cims(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
raw_data = j1.read_json_file()
j1._array_indx = 0
j1._df_samples_list = j1.import_trajectories(raw_data)
j1._sorter = j1.build_sorter(j1._df_samples_list[0])
cims = j1.import_sampled_cims(raw_data, 0, 'dyn.cims')
@ -154,8 +159,8 @@ class TestJsonImporter(unittest.TestCase):
self.assertEqual(list(cims.keys()), j1.sorter)
def test_import_data(self):
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 2)
j1.import_data()
j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
j1.import_data(0)
self.assertEqual(list(j1.variables[j1._variables_key]),
list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1._variables_key]) + 1]))
print(j1.variables)

@ -15,7 +15,8 @@ class TestNetworkGraph(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.read_files = glob.glob(os.path.join('./data', "*.json"))
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(0)
cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()

@ -17,8 +17,8 @@ class TestParametersEstimatior(unittest.TestCase):
def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./data', "*.json"))
cls.array_indx = 0
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name',
cls.array_indx)
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(cls.array_indx)
cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()
@ -58,7 +58,7 @@ class TestParametersEstimatior(unittest.TestCase):
self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True))
def aux_import_sampled_cims(self, cims_label):
i1 = JsonImporter(self.read_files[0], '', '', '', '', '', self.array_indx)
i1 = JsonImporter(self.read_files[0], '', '', '', '', '')
raw_data = i1.read_json_file()
return i1.import_sampled_cims(raw_data, self.array_indx, cims_label)

@ -14,22 +14,31 @@ class TestSamplePath(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.read_files = glob.glob(os.path.join('./data', "*.json"))
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
def test_init_not_initialized_importer(self):
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
self.assertRaises(RuntimeError, SamplePath, importer)
def test_init(self):
s1 = SamplePath(self.importer)
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
s1 = SamplePath(importer)
self.assertIsNone(s1.trajectories)
self.assertIsNone(s1.structure)
self.assertFalse(s1._importer.concatenated_samples.empty)
self.assertIsNone(s1._total_variables_count)
def test_build_trajectories(self):
s1 = SamplePath(self.importer)
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
s1 = SamplePath(importer)
s1.build_trajectories()
self.assertIsInstance(s1.trajectories, Trajectory)
def test_build_structure(self):
s1 = SamplePath(self.importer)
importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
s1 = SamplePath(importer)
s1.build_structure()
self.assertIsInstance(s1.structure, Structure)
self.assertEqual(s1._total_variables_count, len(s1._importer.sorter))

@ -21,7 +21,8 @@ class TestStructureEstimator(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.read_files = glob.glob(os.path.join('./data', "*.json"))
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
cls.importer.import_data(0)
cls.s1 = SamplePath(cls.importer)
cls.s1.build_trajectories()
cls.s1.build_structure()

@ -12,7 +12,8 @@ from PyCTBN.parameters_estimator import ParametersEstimator
def main():
read_files = glob.glob(os.path.join('./data', "*.json")) #Take all json files in this dir
#import data
importer = JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 1)
importer = JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
importer.import_data(0)
#Create a SamplePath Obj
s1 = SamplePath(importer)
#Build The trajectries and the structural infos

@ -5,3 +5,44 @@ abstract\_importer module
:members:
:undoc-members:
:show-inheritance:
An example of a simple CSV Importer
===================================
Suppose you have a csv dataset containing only the trajectories, three variables labels and cardinalites.
Then the resulting importer that inherit and extends AbstractImpoter would be:
.. code_block:: python
class CSVImporter(AbstractImporter):
def __init__(self, file_path):
self._df_samples_list = None
super(CSVImporter, self).__init__(file_path)
def import_data(self):
self.read_csv_file()
self._sorter = self.build_sorter(self._df_samples_list[0])
self.import_variables()
self.import_structure()
self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
def read_csv_file(self):
df = pd.read_csv(self._file_path)
df.drop(df.columns[[0]], axis=1, inplace=True)
self._df_samples_list = [df]
def import_variables(self):
values_list = [3 for var in self._sorter]
# initialize dict of lists
data = {'Name':self._sorter, 'Value':values_list}
# Create the pandas DataFrame
self._df_variables = pd.DataFrame(data)
def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
return list(sample_frame.columns)[1:]
def import_structure(self):
data = {'From':['X','Y','Z'], 'To':['Z','Z','Y']}
self._df_structure = pd.DataFrame(data)
def dataset_id(self) -> object:
pass