Refactor method import_data in JsonImpoter

4 years ago · 1954487500
parent 4efb8ba4fe
commit 1954487500
12 changed files with 156 additions and 101 deletions
--- a/PyCTBN/PyCTBN/abstract_importer.py
+++ b/PyCTBN/PyCTBN/abstract_importer.py
@ -13,6 +13,14 @@ class AbstractImporter(ABC):
    :_df_structure: Dataframe containing the structure of the network (edges)
    :_df_variables: Dataframe containing the nodes cardinalities
    :_sorter: A list containing the columns header (excluding the time column) of the `_concatenated_samples`
+
+    .. warning::
+            The class members ``_df_variables`` and ``_df_structure`` HAVE to be properly constructed
+            as Pandas Dataframes with the following structure:
+            Header of _df_structure = [From_Node | To_Node]
+            Header of _df_variables = [Variable_Label | Variable_Cardinality]
+        .. note::
+            See :class:``JsonImporter`` for an example implementation
    """

    def __init__(self, file_path: str):
@ -24,21 +32,7 @@ class AbstractImporter(ABC):
        self._concatenated_samples = None
        self._sorter = None
        super().__init__()
-
-    @abstractmethod
-    def import_data(self) -> None:
-        """Imports all the trajectories, variables cardinalities, and net edges.
-
-        .. warning::
-            The class members ``_df_variables`` and ``_df_structure`` HAVE to be properly constructed
-            as Pandas Dataframes with the following structure:
-            Header of _df_structure = [From_Node | To_Node]
-            Header of _df_variables = [Variable_Label | Variable_Cardinality]
-        .. note::
-            See :class:``JsonImporter`` for an example of implementation of this method.
-        """
-        pass
-
+    
    @abstractmethod
    def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
        """Initializes the ``_sorter`` class member from a trajectory dataframe, exctracting the header of the frame
--- a/PyCTBN/PyCTBN/json_importer.py
+++ b/PyCTBN/PyCTBN/json_importer.py
@ -8,8 +8,8 @@ from .abstract_importer import AbstractImporter


 class JsonImporter(AbstractImporter):
-    """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare the data in json ext.
-    with the following structure:
+    """Implements the abstracts methods of AbstractImporter and adds all the necessary methods to process and prepare
+    the data in json extension with the following structure:
    [0]
        |_ dyn.cims
        |_ dyn.str
@ -27,14 +27,20 @@ class JsonImporter(AbstractImporter):
    :type time_key: string
    :param variables_key: the key used to identify the names of the variables in the net
    :type variables_key: string
-    :param array_indx: the index of the outer JsonArray to exctract the data from
-    :type array_indx: int
+    :_array_indx: the index of the outer JsonArray to extract the data from
+    :type _array_indx: int
    :_df_samples_list: a Dataframe list in which every dataframe contains a trajectory
+    :_raw_data: The raw contents of the json file to import
+    :type _raw_data: List
    """

    def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
-                 variables_key: str, array_indx: int):
+                 variables_key: str):
        """Constructor method
+
+        .. note::
+            This constructor calls also the method ``read_json_file()``, so after the construction of the object
+            the class member ``_raw_data`` will contain the raw imported json data.
        """
        self._samples_label = samples_label
        self._structure_label = structure_label
@ -42,19 +48,23 @@ class JsonImporter(AbstractImporter):
        self._time_key = time_key
        self._variables_key = variables_key
        self._df_samples_list = None
-        self._array_indx = array_indx
+        self._array_indx = None
        super(JsonImporter, self).__init__(file_path)
+        self._raw_data = self.read_json_file()
+
+    def import_data(self, indx: int) -> None:
+        """Implements the abstract method of :class:`AbstractImporter`.

-    def import_data(self) -> None:
-        """Implements the abstract method of :class:`AbstractImporter`
+        :param indx: the index of the outer JsonArray to extract the data from
+        :type indx: int
        """
-        raw_data = self.read_json_file()
-        self._df_samples_list = self.import_trajectories(raw_data)
+        self._array_indx = indx
+        self._df_samples_list = self.import_trajectories(self._raw_data)
        self._sorter = self.build_sorter(self._df_samples_list[0])
        self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
        self.clear_data_frame_list()
-        self._df_structure = self.import_structure(raw_data)
-        self._df_variables = self.import_variables(raw_data)
+        self._df_structure = self.import_structure(self._raw_data)
+        self._df_variables = self.import_variables(self._raw_data)

    def import_trajectories(self, raw_data: typing.List) -> typing.List:
        """Imports the trajectories from the list of dicts ``raw_data``.
@ -87,7 +97,7 @@ class JsonImporter(AbstractImporter):
        return self.one_level_normalizing(raw_data, self._array_indx, self._variables_label)

    def read_json_file(self) -> typing.List:
-        """Reads the JSON file in the path self.filePath
+        """Reads the JSON file in the path self.filePath.

        :return: The contents of the json file
        :rtype: List
--- a/PyCTBN/PyCTBN/original_ctpc_algorithm.py
+++ b/PyCTBN/PyCTBN/original_ctpc_algorithm.py
@ -1,12 +1,10 @@
-import glob
+
 import json
-import os
 from itertools import combinations
 import typing

 import numpy as np
 import pandas as pd
-from line_profiler import LineProfiler
 from scipy.stats import chi2 as chi2_dist
 from scipy.stats import f as f_dist
 from tqdm import tqdm
@ -36,7 +34,7 @@ class OriginalCTPCAlgorithm(AbstractImporter):
        pass

    def __init__(self, file_path: str, samples_label: str, structure_label: str, variables_label: str, time_key: str,
-                 variables_key: str, array_indx: int):
+                 variables_key: str, raw_data: typing.List):
        """
        Parameters:
            file_path: the path of the file that contains tha data to be imported
@ -53,11 +51,12 @@ class OriginalCTPCAlgorithm(AbstractImporter):
        self.variables_key = variables_key
        self.df_samples_list = None
        self.trajectories = None
-        self._array_indx  = array_indx
+        self._array_indx  = None
        self.matrix = None
        super(OriginalCTPCAlgorithm, self).__init__(file_path)
+        self._raw_data = raw_data

-    def import_data(self):
+    def import_data(self, indx):
        """
        Imports and prepares all data present needed for subsequent processing.
        Parameters:
@ -65,14 +64,16 @@ class OriginalCTPCAlgorithm(AbstractImporter):
        Returns:
            _void
        """
-        raw_data = self.read_json_file()
-        self.df_samples_list = self.import_trajectories(raw_data)
+        self._array_indx = indx
+        self.df_samples_list = self.import_trajectories(self._raw_data)
        self._sorter = self.build_sorter(self.df_samples_list[0])
        #self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
        #self.clear_data_frame_list()
-        self._df_structure = self.import_structure(raw_data)
-        self._df_variables = self.import_variables(raw_data, self._sorter)
+        self._df_structure = self.import_structure(self._raw_data)
+        self._df_variables = self.import_variables(self._raw_data, self._sorter)

+    def datasets_numb(self):
+        return len(self._raw_data)

    def import_trajectories(self, raw_data: typing.List):
        """
@ -107,15 +108,7 @@ class OriginalCTPCAlgorithm(AbstractImporter):
            :Datframe containg the variables simbolic labels and their cardinalities
        """
        return self.one_level_normalizing(raw_data, self._array_indx, self.variables_label)
-        #TODO Usando come Pre-requisito l'ordinamento del frame _df_variables uguale a quello presente in
-        #TODO self _sorter questo codice risulta inutile
-        """self._df_variables[self._variables_key] = self._df_variables[self._variables_key].astype("category")
-        self._df_variables[self._variables_key] = self._df_variables[self._variables_key].cat.set_categories(sorter)
-        self._df_variables = self._df_variables.sort_values([self._variables_key])
-        self._df_variables.reset_index(inplace=True)
-        self._df_variables.drop('index', axis=1, inplace=True)
-        #print("Var Frame", self._df_variables)
-        """
+

    def read_json_file(self) -> typing.List:
        """
@ -160,9 +153,6 @@ class OriginalCTPCAlgorithm(AbstractImporter):
        smps = raw_data[indx][trajectories_key]
        df_samples_list = [dataframe(sample) for sample in smps]
        return df_samples_list
-        #columns_header = list(self._df_samples_list[0].columns.values)
-        #columns_header.remove(self._time_key)
-        #self._sorter = columns_header

    def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
        """
--- a/PyCTBN/PyCTBN/sample_path.py
+++ b/PyCTBN/PyCTBN/sample_path.py
@ -12,20 +12,25 @@ class SamplePath:
    cardinalites. Has the task of creating the objects ``Trajectory`` and ``Structure`` that will
    contain the mentioned data.

-    :param importer: the Importer objects that will import ad process data
+    :param importer: the Importer object which contains the imported and processed data
    :type importer: AbstractImporter
    :_trajectories: the ``Trajectory`` object that will contain all the concatenated trajectories
-    :_structure: the ``Structure`` Object that will contain all the structurral infos about the net
+    :_structure: the ``Structure`` Object that will contain all the structural infos about the net
    :_total_variables_count: the number of variables in the net
    """
    def __init__(self, importer: AbstractImporter):
        """Constructor Method
        """
        self._importer = importer
+        if (self._importer._df_variables is None or self._importer._df_structure is None
+                or self._importer._concatenated_samples is None):
+            raise RuntimeError('The importer object has to contain the all processed data!')
+        if(self._importer._df_variables.empty or self._importer._df_structure.empty
+                or self._importer._concatenated_samples.empty):
+            raise RuntimeError('The importer object has to contain the all processed data!')
        self._trajectories = None
        self._structure = None
        self._total_variables_count = None
-        self._importer.import_data()

    def build_trajectories(self) -> None:
        """Builds the Trajectory object that will contain all the trajectories.
@ -60,7 +65,7 @@ class SamplePath:
        return self._structure

    @property
-    def total_variables_count(self):
+    def total_variables_count(self) -> int:
        return self._total_variables_count


--- a/PyCTBN/PyCTBN/simple_cvs_importer.py
+++ b/PyCTBN/PyCTBN/simple_cvs_importer.py
@ -4,8 +4,6 @@ import os

 import typing

-#import abstract_importer as ai
-#import sample_path as sp
 from .abstract_importer import AbstractImporter
 from .sample_path import SamplePath

--- a/PyCTBN/tests/test_json_importer.py
+++ b/PyCTBN/tests/test_json_importer.py
@ -6,6 +6,7 @@ import numpy as np
 import pandas as pd
 import json

+
 from ..PyCTBN.json_importer import JsonImporter


@ -14,10 +15,9 @@ class TestJsonImporter(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        cls.read_files = glob.glob(os.path.join('./data', "*.json"))
-        #print(os.path.join('../data'))

    def test_init(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
        self.assertEqual(j1._samples_label, 'samples')
        self.assertEqual(j1._structure_label, 'dyn.str')
        self.assertEqual(j1._variables_label, 'variables')
@ -29,6 +29,8 @@ class TestJsonImporter(unittest.TestCase):
        self.assertIsNone(j1.structure)
        self.assertIsNone(j1.concatenated_samples)
        self.assertIsNone(j1.sorter)
+        self.assertIsNone(j1._array_indx)
+        self.assertIsInstance(j1._raw_data, list)

    def test_read_json_file_found(self):
        data_set = {"key1": [1, 2, 3], "key2": [4, 5, 6]}
@ -36,39 +38,37 @@ class TestJsonImporter(unittest.TestCase):
            json.dump(data_set, f)
        path = os.getcwd()
        path = path + '/data.json'
-        j1 = JsonImporter(path, '', '', '', '', '', 0)
-        imported_data = j1.read_json_file()
-        self.assertTrue(self.ordered(data_set) == self.ordered(imported_data))
+        j1 = JsonImporter(path, '', '', '', '', '')
+        #imported_data = j1.read_json_file()
+        self.assertTrue(self.ordered(data_set) == self.ordered(j1._raw_data))
        os.remove('data.json')

    def test_read_json_file_not_found(self):
        path = os.getcwd()
        path = path + '/data.json'
-        j1 = JsonImporter(path, '', '', '', '', '', 0)
-        self.assertRaises(FileNotFoundError, j1.read_json_file)
+        #j1 = JsonImporter(path, '', '', '', '', '')
+        self.assertRaises(FileNotFoundError, JsonImporter, path, '', '', '', '', '')

    def test_normalize_trajectories(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
-        raw_data = j1.read_json_file()
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        #raw_data = j1.read_json_file()
        #print(raw_data)
-        df_samples_list = j1.normalize_trajectories(raw_data, 0, j1._samples_label)
-        self.assertEqual(len(df_samples_list), len(raw_data[0][j1._samples_label]))
-        #self.assertEqual(list(j1._df_samples_list[0].columns.values)[1:], j1.sorter)
+        df_samples_list = j1.normalize_trajectories(j1._raw_data, 0, j1._samples_label)
+        self.assertEqual(len(df_samples_list), len(j1._raw_data[0][j1._samples_label]))

    def test_normalize_trajectories_wrong_indx(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
-        raw_data = j1.read_json_file()
-        self.assertRaises(IndexError, j1.normalize_trajectories, raw_data, 474, j1._samples_label)
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        self.assertRaises(IndexError, j1.normalize_trajectories, j1._raw_data, 474, j1._samples_label)

    def test_normalize_trajectories_wrong_key(self):
-        j1 = JsonImporter(self.read_files[0], 'sample', 'dyn.str', 'variables', 'Time', 'Name', 0)
-        raw_data = j1.read_json_file()
-        self.assertRaises(KeyError, j1.normalize_trajectories, raw_data, 0, j1._samples_label)
+        j1 = JsonImporter(self.read_files[0], 'sample', 'dyn.str', 'variables', 'Time', 'Name')
+        self.assertRaises(KeyError, j1.normalize_trajectories, j1._raw_data, 0, j1._samples_label)

    def test_compute_row_delta_single_samples_frame(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
-        raw_data = j1.read_json_file()
-        j1._df_samples_list = j1.import_trajectories(raw_data)
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        #raw_data = j1.read_json_file()
+        j1._array_indx = 0
+        j1._df_samples_list = j1.import_trajectories(j1._raw_data)
        sample_frame = j1._df_samples_list[0]
        original_copy = sample_frame.copy()
        columns_header = list(sample_frame.columns.values)
@ -88,9 +88,10 @@ class TestJsonImporter(unittest.TestCase):
                           np.array(original_copy.iloc[indx + 1][columns_header[1:]], dtype=int))

    def test_compute_row_delta_in_all_frames(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
-        raw_data = j1.read_json_file()
-        j1._df_samples_list = j1.import_trajectories(raw_data)
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        #raw_data = j1.read_json_file()
+        j1._array_indx = 0
+        j1._df_samples_list = j1.import_trajectories(j1._raw_data)
        j1._sorter = j1.build_sorter(j1._df_samples_list[0])
        j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list)
        self.assertEqual(list(j1._df_samples_list[0].columns.values),
@ -98,9 +99,10 @@ class TestJsonImporter(unittest.TestCase):
        self.assertEqual(list(j1.concatenated_samples.columns.values)[0], j1._time_key)

    def test_clear_data_frame_list(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
-        raw_data = j1.read_json_file()
-        j1._df_samples_list = j1.import_trajectories(raw_data)
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        #raw_data = j1.read_json_file()
+        j1._array_indx = 0
+        j1._df_samples_list = j1.import_trajectories(j1._raw_data)
        j1._sorter = j1.build_sorter(j1._df_samples_list[0])
        j1.compute_row_delta_in_all_samples_frames(j1._df_samples_list)
        j1.clear_data_frame_list()
@ -108,8 +110,8 @@ class TestJsonImporter(unittest.TestCase):
            self.assertTrue(df.empty)

    def test_clear_concatenated_frame(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
-        j1.import_data()
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        j1.import_data(0)
        j1.clear_concatenated_frame()
        self.assertTrue(j1.concatenated_samples.empty)

@ -119,7 +121,7 @@ class TestJsonImporter(unittest.TestCase):
            json.dump(data_set, f)
        path = os.getcwd()
        path = path + '/data.json'
-        j1 = JsonImporter(path, '', '', '', '', '', 0)
+        j1 = JsonImporter(path, '', '', '', '', '')
        raw_data = j1.read_json_file()
        frame = pd.DataFrame(raw_data)
        col_list = j1.build_list_of_samples_array(frame)
@ -131,22 +133,25 @@ class TestJsonImporter(unittest.TestCase):
        os.remove('data.json')

    def test_import_variables(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
        sorter = ['X', 'Y', 'Z']
        raw_data = [{'variables':{"Name": ['X', 'Y', 'Z'], "value": [3, 3, 3]}}]
+        j1._array_indx = 0
        df_var = j1.import_variables(raw_data)
        self.assertEqual(list(df_var[j1._variables_key]), sorter)

    def test_import_structure(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
        raw_data = [{"dyn.str":[{"From":"X","To":"Z"},{"From":"Y","To":"Z"},{"From":"Z","To":"Y"}]}]
+        j1._array_indx = 0
        df_struct = j1.import_structure(raw_data)
        #print(raw_data[0]['dyn.str'][0].items())
        self.assertIsInstance(df_struct, pd.DataFrame)

    def test_import_sampled_cims(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
        raw_data = j1.read_json_file()
+        j1._array_indx = 0
        j1._df_samples_list = j1.import_trajectories(raw_data)
        j1._sorter = j1.build_sorter(j1._df_samples_list[0])
        cims = j1.import_sampled_cims(raw_data, 0, 'dyn.cims')
@ -154,8 +159,8 @@ class TestJsonImporter(unittest.TestCase):
        self.assertEqual(list(cims.keys()), j1.sorter)

    def test_import_data(self):
-        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 2)
-        j1.import_data()
+        j1 = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        j1.import_data(0)
        self.assertEqual(list(j1.variables[j1._variables_key]),
                         list(j1.concatenated_samples.columns.values[1:len(j1.variables[j1._variables_key]) + 1]))
        print(j1.variables)
--- a/PyCTBN/tests/test_networkgraph.py
+++ b/PyCTBN/tests/test_networkgraph.py
@ -15,7 +15,8 @@ class TestNetworkGraph(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.read_files = glob.glob(os.path.join('./data', "*.json"))
-        cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
+        cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        cls.importer.import_data(0)
        cls.s1 = SamplePath(cls.importer)
        cls.s1.build_trajectories()
        cls.s1.build_structure()
--- a/PyCTBN/tests/test_parameters_estimator.py
+++ b/PyCTBN/tests/test_parameters_estimator.py
@ -17,8 +17,8 @@ class TestParametersEstimatior(unittest.TestCase):
    def setUpClass(cls) -> None:
        cls.read_files = glob.glob(os.path.join('./data', "*.json"))
        cls.array_indx = 0
-        cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name',
-                                       cls.array_indx)
+        cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        cls.importer.import_data(cls.array_indx)
        cls.s1 = SamplePath(cls.importer)
        cls.s1.build_trajectories()
        cls.s1.build_structure()
@ -58,7 +58,7 @@ class TestParametersEstimatior(unittest.TestCase):
            self.assertTrue(np.all(np.isclose(r1, r2, 1e-01, 1e-01) == True))

    def aux_import_sampled_cims(self, cims_label):
-        i1 = JsonImporter(self.read_files[0], '', '', '', '', '', self.array_indx)
+        i1 = JsonImporter(self.read_files[0], '', '', '', '', '')
        raw_data = i1.read_json_file()
        return i1.import_sampled_cims(raw_data, self.array_indx, cims_label)

--- a/PyCTBN/tests/test_sample_path.py
+++ b/PyCTBN/tests/test_sample_path.py
@ -14,22 +14,31 @@ class TestSamplePath(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        cls.read_files = glob.glob(os.path.join('./data', "*.json"))
-        cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
+
+    def test_init_not_initialized_importer(self):
+        importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        self.assertRaises(RuntimeError, SamplePath, importer)

    def test_init(self):
-        s1 = SamplePath(self.importer)
+        importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        importer.import_data(0)
+        s1 = SamplePath(importer)
        self.assertIsNone(s1.trajectories)
        self.assertIsNone(s1.structure)
        self.assertFalse(s1._importer.concatenated_samples.empty)
        self.assertIsNone(s1._total_variables_count)

    def test_build_trajectories(self):
-        s1 = SamplePath(self.importer)
+        importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        importer.import_data(0)
+        s1 = SamplePath(importer)
        s1.build_trajectories()
        self.assertIsInstance(s1.trajectories, Trajectory)

    def test_build_structure(self):
-        s1 = SamplePath(self.importer)
+        importer = JsonImporter(self.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        importer.import_data(0)
+        s1 = SamplePath(importer)
        s1.build_structure()
        self.assertIsInstance(s1.structure, Structure)
        self.assertEqual(s1._total_variables_count, len(s1._importer.sorter))
--- a/PyCTBN/tests/test_structure_estimator.py
+++ b/PyCTBN/tests/test_structure_estimator.py
@ -21,7 +21,8 @@ class TestStructureEstimator(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.read_files = glob.glob(os.path.join('./data', "*.json"))
-        cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 0)
+        cls.importer = JsonImporter(cls.read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+        cls.importer.import_data(0)
        cls.s1 = SamplePath(cls.importer)
        cls.s1.build_trajectories()
        cls.s1.build_structure()
--- a/basic_main.py
+++ b/basic_main.py
@ -12,7 +12,8 @@ from PyCTBN.parameters_estimator import ParametersEstimator
 def main():
    read_files = glob.glob(os.path.join('./data', "*.json")) #Take all json files in this dir
    #import data
-    importer = JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name', 1)
+    importer = JsonImporter(read_files[0], 'samples', 'dyn.str', 'variables', 'Time', 'Name')
+    importer.import_data(0)
    #Create a SamplePath Obj
    s1 = SamplePath(importer)
    #Build The trajectries and the structural infos
--- a/documentation/rst/abstract_importer.rst
+++ b/documentation/rst/abstract_importer.rst
@ -5,3 +5,44 @@ abstract\_importer module
   :members:
   :undoc-members:
   :show-inheritance:
+
+An example of a simple CSV Importer
+===================================
+Suppose you have a csv dataset containing only the trajectories, three variables labels and cardinalites.
+Then the resulting importer that inherit and extends AbstractImpoter would be:
+
+.. code_block:: python
+   class CSVImporter(AbstractImporter):
+
+    def __init__(self, file_path):
+        self._df_samples_list = None
+        super(CSVImporter, self).__init__(file_path)
+
+    def import_data(self):
+        self.read_csv_file()
+        self._sorter = self.build_sorter(self._df_samples_list[0])
+        self.import_variables()
+        self.import_structure()
+        self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
+
+    def read_csv_file(self):
+        df = pd.read_csv(self._file_path)
+        df.drop(df.columns[[0]], axis=1, inplace=True)
+        self._df_samples_list = [df]
+
+    def import_variables(self):
+        values_list = [3 for var in self._sorter]
+        # initialize dict of lists
+        data = {'Name':self._sorter, 'Value':values_list}
+        # Create the pandas DataFrame
+        self._df_variables = pd.DataFrame(data)
+
+    def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
+        return list(sample_frame.columns)[1:]
+
+    def import_structure(self):
+        data = {'From':['X','Y','Z'], 'To':['Z','Z','Y']}
+        self._df_structure = pd.DataFrame(data)
+
+    def dataset_id(self) -> object:
+        pass