diff --git a/.gitattributes b/.gitattributes
deleted file mode 100644
index 7c40d77..0000000
--- a/.gitattributes
+++ /dev/null
@@ -1,3 +0,0 @@
-*.js linguist-vendored
-*.html linguist-vendored
-*.css linguist-vendored
diff --git a/.gitignore b/.gitignore
index 6ab54bc..c74793f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,21 +1,3 @@
-__pycache__
-.vscode
-**/__pycache__
-**/data
-**/PyCTBN.egg-info
-**/results_data
-**/.scannerwork
-**/build
-test.py
-test_1.py
-test1.json
-test2.json
-test3.json
-result0.png
-example.json
-test_time.py
-.idea
-
# Virtual Environment
venv/
.venv/
diff --git a/CTBN_Diagramma_Dominio.pdf b/CTBN_Diagramma_Dominio.pdf
deleted file mode 100644
index e6e1c83..0000000
Binary files a/CTBN_Diagramma_Dominio.pdf and /dev/null differ
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4c1d33d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,51 @@
+
+
+# PyCTBN
+
+[![coverage](https://codecov.io/gh/madlabunimib/PyCTBN/branch/master/graph/badge.svg)](https://codecov.io/gh/madlabunimib/PyCTBN)
+
+A Continuous Time Bayesian Networks Library
+
+
+
+## Installation from Source
+
+### Prerequisites
+
++ `python3` (_>=3.6_)
++ `python3-setuptools`
++ `python3-pip`
+
+### Install
+
+At the _root_ of the project's folder launch:
+
+```sh
+pip3 install .
+```
+
+### Uninstall
+
+Anywhere in the system launch:
+
+```sh
+pip3 uninstall
+```
+
+### Upgrade
+
+At the _root_ of the project's folder launch:
+
+```sh
+pip3 install --upgrade .
+```
+
+### Installation via wheel package
+
+**Pip installation**
+
+Download the latest release in .tar.gz or .whl format and simply use pip install to install it:
+
+```sh
+pip install PyCTBN-2.2.tar.gz
+```
diff --git a/README.rst b/README.rst
deleted file mode 100644
index f8f9660..0000000
--- a/README.rst
+++ /dev/null
@@ -1,399 +0,0 @@
-PyCTBN
-======
-
-.. image:: https://codecov.io/gh/madlabunimib/PyCTBN/branch/master/graph/badge.svg
- :target: https://codecov.io/gh/madlabunimib/PyCTBN
-
-
-
-
-A Continuous Time Bayesian Networks Library
-
-Installation/Usage
-*******************
-
-The library has been tested on Linux and Windows with Python 3.8 and it relies on the following Python modules:
-
-- numpy
-- pandas
-- networkx
-- scipy
-- matplotlib
-- tqdm
-
-**Pip installation**
-
-Download the latest release in .tar.gz or .whl format and simply use pip install to install it:
-
- $ pip install PyCTBN-2.2.tar.gz
-
-Documentation
-*************
-Please refer to https://madlabunimib.github.io/PyCTBN/ for the full project documentation.
-
-Implementing your own data importer
-***********************************
-| This example demonstrates the implementation of a simple data importer the extends the class AbstractImporter
-| to import data in csv format. The net in exam has three ternary nodes and no prior net structure.
-| Suppose the trajectories that have to be inported have this structure:
-
-.. image:: docs-out/esempio_dataset.png
- :width: 600
- :alt: An example trajectory to be imported.
-
-| In the read_csv_file method the data are imported in memory, put in a list and assigned to the _df_samples_list class
-| member, so that it contains all the trajectories to be processed.
-| In the import_variables method the dataframe containing the nodes labels and the cardinalities of the nodes
-| is assigned to the _df_variables class member.
-| The class member _sorter has to contain the nodes labels in the same order of the trajectory columns,
-| just override the build_sorter method to do that.
-| If your datasets names have particular id, you can keep it using the dataset_id method to assign the id to a new class member.
-| Finally the import_data method call all the previously implemented methods and calls the compute_row_delta_in_all_samples_frames
-| to process all the trajectories in _df_samples_list.
-| For more information about the class memebers and methods of AbstractImporter please refer to the documentation.
-
-.. code-block:: python
-
- import pandas as pd
- import typing
-
- from PyCTBN import AbstractImporter
- from PyCTBN import SamplePath
-
- class CSVImporter(AbstractImporter):
-
- def __init__(self, file_path):
- self._df_samples_list = None
- super(CSVImporter, self).__init__(file_path)
-
- def import_data(self):
- self.read_csv_file()
- self._sorter = self.build_sorter(self._df_samples_list[0])
- self.import_variables()
- self.compute_row_delta_in_all_samples_frames(self._df_samples_list)
-
- def read_csv_file(self):
- df = pd.read_csv(self._file_path)
- df.drop(df.columns[[0]], axis=1, inplace=True)
- self._df_samples_list = [df]
-
- def import_variables(self):
- values_list = [3 for var in self._sorter]
- # initialize dict of lists
- data = {'Name':self._sorter, 'Value':values_list}
- # Create the pandas DataFrame
- self._df_variables = pd.DataFrame(data)
-
- def build_sorter(self, sample_frame: pd.DataFrame) -> typing.List:
- return list(sample_frame.columns)[1:]
-
- def dataset_id(self) -> object:
- pass
-
- def main():
- # create the importer object
- csvimp = CSVImporter('/dataset_example.csv')
- # call the wrapping method that wil import and process the data
- csvimp.import_data()
- # pass the AbstractImporter object to the SamplePath constructor
- s1 = SamplePath(csvimp)
- # SamplePath will contain the Trajecotry object...
- s1.build_trajectories()
- #...and the Structure object with all the process data
- s1.build_structure()
-
-
-Structure Estimation Examples
-##############################
-
-| In this section some examples will be shown in order to provide some useful information about the usage of the library
-
-
-Constraint based estimation
-****************************
-| This example shows how to estimate the structure given a series of trajectories using a constraint based approach.
-| The first three instructions import all the necessary data (trajectories, nodes cardinalities, nodes labels),
-| and are contextual to the dataset that is been used, in the code comments are marked as optional <>.
-| If your data has a different structure or format you should implement your own importer
-| (see Implementing your own importer example).
-| The other instructions are not optional and should follow the same order.
-| A SamplePath object is been created, passing an AbstractImporter object that contains the correct class members
-| filled with the data that are necessary to estimate the structure.
-| Next the build_trajectories and build_structure methods are called to instantiate the objects that will contain
-| the processed trajectories and all the net information.
-| Then an estimator object is created, in this case a constraint based estimator,
-| it necessary to pass a SamplePath object where build_trajectories and build_structure methods have already been called.
-| If you have prior knowledge about the net structure pass it to the constructor with the known_edges parameter.
-| The other three parameters are contextual to the StructureConstraintBasedEstimator, see the documentation for more details.
-| To estimate the structure simply call the estimate_structure method.
-| You can obtain the estimated structure as a boolean adjacency matrix with the method adjacency_matrix,
-| or save it as a json file that contains all the nodes labels, and obviously the estimated edges.
-| You can also save a graphical model representation of the estimated structure
-| with the save_plot_estimated_structure_graph.
-
-.. code-block:: python
-
- import glob
- import os
-
- from PyCTBN import JsonImporter
- from PyCTBN import SamplePath
- from PyCTBN import StructureConstraintBasedEstimator
-
-
- def structure_constraint_based_estimation_example():
- #
- read_files = glob.glob(os.path.join('./data', "*.json"))
- #
- importer = JsonImporter(file_path=read_files[0], samples_label='samples',
- structure_label='dyn.str', variables_label='variables',
- time_key='Time', variables_key='Name')
- #
- importer.import_data(0)
- # construct a SamplePath Object passing a filled AbstractImporter object
- s1 = SamplePath(importer=importer)
- # build the trajectories
- s1.build_trajectories()
- # build the information about the net
- s1.build_structure()
- # construct a StructureEstimator object passing a correctly build SamplePath object
- # and the independence tests significance, if you have prior knowledge about
- # the net structure create a list of tuples
- # that contains them and pass it as known_edges parameter
- se1 = StructureConstraintBasedEstimator(sample_path=s1, exp_test_alfa=0.1, chi_test_alfa=0.1,
- known_edges=[], thumb_threshold=25)
- # call the algorithm to estimate the structure
- se1.estimate_structure()
- # obtain the adjacency matrix of the estimated structure
- print(se1.adjacency_matrix())
- # save the estimated structure to a json file
- # (remember to specify the path AND the .json extension)....
- se1.save_results('./results0.json')
- # ...or save it also in a graphical model fashion
- # (remember to specify the path AND the .png extension)
- se1.save_plot_estimated_structure_graph('./result0.png')
-
-
-
-Score based estimation with Hill Climbing
-*****************************************
-
-| This example shows how to estimate the structure given a series of trajectories using a score based approach
-| and the Hill Climbing algorithm as optimization strategy.
-| The structure of the code is the same as the previus example, but an explanation of the Structure score based estimator
-| will be provided.
-| Then an estimator object is created, in this case a score based estimator,
-| it necessary to pass a SamplePath object where build_trajectories and build_structure methods have already been called.
-| If you have prior knowledge about the net structure pass it to the constructor with the known_edges parameter.
-| The other parameters are contextual to the StructureScoreBasedEstimator, see the documentation for more details.
-| To estimate the structure simply call the estimate_structure method passing the desidered parameters, such as the
-| optimization strategy, or simply use the default configuration.
-| In this case an Hill Climbing approch is choosen.
-
-.. code-block:: python
-
- import glob
- import os
-
- from PyCTBN import JsonImporter
- from PyCTBN import SamplePath
- from PyCTBN import StructureScoreBasedEstimator
-
-
- def structure_constraint_based_estimation_example():
- #
- read_files = glob.glob(os.path.join('./data', "*.json"))
- #
- importer = JsonImporter(file_path=read_files[0], samples_label='samples',
- structure_label='dyn.str', variables_label='variables',
- time_key='Time', variables_key='Name')
- #
- importer.import_data(0)
- # construct a SamplePath Object passing a filled AbstractImporter object
- s1 = SamplePath(importer=importer)
- # build the trajectories
- s1.build_trajectories()
- # build the information about the net
- s1.build_structure()
- # construct a StructureEstimator object passing a correctly build SamplePath object
- # and hyperparameters tau and alpha, if you have prior knowledge about
- # the net structure create a list of tuples
- # that contains them and pass it as known_edges parameter
- se1 = StructureScoreBasedEstimator(sample_path=s1, tau_xu = 0.1, alpha_xu = 1,
- known_edges=[])
- # call the algorithm to estimate the structure
- # and pass all the desidered parameters, in this case an Hill Climbing approach
- # will be selected as optimization strategy.
- se1.estimate_structure(
- max_parents = None,
- iterations_number = 40,
- patience = None,
- optimizer = 'hill'
- )
- # obtain the adjacency matrix of the estimated structure
- print(se1.adjacency_matrix())
- # save the estimated structure to a json file
- # (remember to specify the path AND the .json extension)....
- se1.save_results('./results0.json')
- # ...or save it also in a graphical model fashion
- # (remember to specify the path AND the .png extension)
- se1.save_plot_estimated_structure_graph('./result0.png')
-
-
-Score based estimation with Tabu Search and Data Augmentation
-**************************************************************
-
-| This example shows how to estimate the structure given a series of trajectories using a score based approach
-| and the Tabu Search algorithm as optimization strategy and how to use a data augmentation strategy to increase the
-| number of data available.
-| The structure of the code is the same as the previus example, but an explanation of the data augmentation technique
-| will be provided.
-| In this case a SampleImporter is used to import the data instead of a JsonImporter.
-| Using a SampleImporter requires the user to read the data and put it into different lists or DataFrames before to
-| inizialize the SampleImporter instance.
-| Then it is possible to increase the amount of data by using one of the external libraries who provide data augmentation
-| approaches, in this example sklearn is used.
-| Then all the information can be passed to the SampleImporter constructor and the import_data method can be used to provide
-| the preprossing operations of the PyCTBN library.
-| Then an estimator object is created, in this case a score based estimator,
-| it necessary to pass a SamplePath object where build_trajectories and build_structure methods have already been called.
-| If you have prior knowledge about the net structure pass it to the constructor with the known_edges parameter.
-| The other parameters are contextual to the StructureScoreBasedEstimator, see the documentation for more details.
-| To estimate the structure simply call the estimate_structure method passing the desidered parameters, such as the
-| optimization strategy, or simply use the default configuration.
-| In this case an Hill Climbing approch is choosen.
-
-
-.. code-block:: python
-
- import glob
- import os
-
- from sklearn.utils import resample
-
- from PyCTBN import SampleImporter
- from PyCTBN import SamplePath
- from PyCTBN import StructureScoreBasedEstimator
-
-
- def structure_constraint_based_estimation_example():
- #
- read_files = glob.glob(os.path.join('./data', "*.json"))
-
- # read the first file in the directory (or pass the file path)
- with open(file_path=read_files[0]) as f:
- raw_data = json.load(f)
-
- # read the variables information
- variables= pd.DataFrame(raw_data[0]["variables"])
-
- # read the prior information if they are given
- prior_net_structure = pd.DataFrame(raw_data[0]["dyn.str"])
-
- #read the samples
- trajectory_list_raw= raw_data[0]["samples"]
-
- #convert them in DataFrame
- trajectory_list = [pd.DataFrame(sample) for sample in trajectory_list_raw]
-
- # use an external library in order to provide the data augmentation operations, in this case
- # sklearn.utils is used
- augmented_trajectory_list = resample (trajectory_list, replace = True, n_samples = 300 )
-
-
- #
- importer = SampleImporter(
- trajectory_list = augmented_trajectory_list,
- variables=variables,
- prior_net_structure=prior_net_structure
- )
-
- #
- importer.import_data()
- # construct a SamplePath Object passing a filled AbstractImporter object
-
- s1 = SamplePath(importer=importer)
- # build the trajectories
- s1.build_trajectories()
- # build the information about the net
- s1.build_structure()
- # construct a StructureEstimator object passing a correctly build SamplePath object
- # and hyperparameters tau and alpha, if you have prior knowledge about
- # the net structure create a list of tuples
- # that contains them and pass it as known_edges parameter
- se1 = StructureScoreBasedEstimator(sample_path=s1, tau_xu = 0.1, alpha_xu = 1,
- known_edges=[])
- # call the algorithm to estimate the structure
- # and pass all the desidered parameters, in this case a Tabu Search approach
- # will be selected as optimization strategy. It is possible to select the tabu list length and
- # the tabu rules duration, and the other parameters as in the previus example.
- se1.estimate_structure(
- max_parents = None,
- iterations_number = 100,
- patience = 20,
- optimizer = 'tabu',
- tabu_length = 10,
- tabu_rules_duration = 10
- )
- # obtain the adjacency matrix of the estimated structure
- print(se1.adjacency_matrix())
- # save the estimated structure to a json file
- # (remember to specify the path AND the .json extension)....
- se1.save_results('./results0.json')
- # ...or save it also in a graphical model fashion
- # (remember to specify the path AND the .png extension)
- se1.save_plot_estimated_structure_graph('./result0.png')
-
-Network graph and parameters generation, trajectory sampling, data export
-**************************************************************
-
-| This example shows how to randomically generate a CTBN, that means both the graph and the CIMS, taking as input
-| the list of variables labels and their related cardinality. The whole procedure is managed by NetworkGenerator,
-| respectively with the generate_graph method, that allows to define the expected density of the graph, and
-| generate_cims method, that takes as input the range in which the parameters must be included.
-| Afterwards, the example shows how to sample a trajectory over the previously generated network, through the
-| CTBN_Sample method and setting a fixed number of transitions equal to 30000.
-| The output data, made up by network structure, cims and trajectory, are then saved on a JSON file by
-| exploiting the functions of JSONExporter class.
-| To prove the simplicity of interaction among the modules, the example eventually reads the file and computes
-| the estimation of the structure by using a ConstraintBased approach.
-
-.. code-block:: python
-
- from pyctbn.legacy.structure_graph.trajectory_generator import TrajectoryGenerator
- from pyctbn.legacy.structure_graph.network_generator import NetworkGenerator
- from pyctbn.legacy.utility.json_importer import JsonImporter
- from pyctbn.legacy.utility.json_exporter import JsonExporter
- from pyctbn.legacy.structure_graph.sample_path import SamplePath
- from pyctbn.legacy.estimators.structure_constraint_based_estimator import StructureConstraintBasedEstimator
-
- def main():
- # Network Generation
- labels = ["X", "Y", "Z"]
- card = 3
- vals = [card for l in labels]
- cim_min = 1
- cim_max = 3
- ng = NetworkGenerator(labels, vals)
- ng.generate_graph(0.3)
- ng.generate_cims(cim_min, cim_max)
-
- # Trajectory Generation
- e1 = JsonExporter(ng.variables, ng.dyn_str, ng.cims)
- tg = TrajectoryGenerator(variables = ng.variables, dyn_str = ng.dyn_str, dyn_cims = ng.cims)
- sigma = tg.CTBN_Sample(max_tr = 30000)
- e1.add_trajectory(sigma)
- e1.out_file("example.json")
-
- # Network Estimation (Constraint Based)
- importer = JsonImporter(file_path = "example.json", samples_label = "samples",
- structure_label = "dyn.str", variables_label = "variables",
- cims_label = "dyn.cims", time_key = "Time",
- variables_key = "Name")
- importer.import_data(0)
- s1 = SamplePath(importer=importer)
- s1.build_trajectories()
- s1.build_structure()
- se1 = StructureConstraintBasedEstimator(sample_path=s1, exp_test_alfa=0.1, chi_test_alfa=0.1,
- known_edges=[], thumb_threshold=25)
- edges = se1.estimate_structure(True)
diff --git a/setup.cfg b/setup.cfg
index bbe8f47..5ffea09 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -7,7 +7,6 @@ filename = */src/*/*.py,*/tests/*.py,*/setup.py
max-line-length = 127
#ignore = E501
select=E9,F63,F7,F82
-# E501 line too long (xxx > 79 characters)
[pylint]
output-format = text
diff --git a/tests/data/networks_and_trajectories_binary_data_01_3.json b/tests/data/networks_and_trajectories_binary_data_01_3.json
new file mode 100644
index 0000000..a287826
Binary files /dev/null and b/tests/data/networks_and_trajectories_binary_data_01_3.json differ
diff --git a/tests/data/networks_and_trajectories_binary_data_02_10_1.json b/tests/data/networks_and_trajectories_binary_data_02_10_1.json
new file mode 100644
index 0000000..1fb0d3e
Binary files /dev/null and b/tests/data/networks_and_trajectories_binary_data_02_10_1.json differ
diff --git a/tests/data/networks_and_trajectories_ternary_data_01_6_1.json b/tests/data/networks_and_trajectories_ternary_data_01_6_1.json
new file mode 100644
index 0000000..efda696
Binary files /dev/null and b/tests/data/networks_and_trajectories_ternary_data_01_6_1.json differ