From 09a9d6b221d499e5293ec190c1df9c02bd70acc3 Mon Sep 17 00:00:00 2001
From: meliurwen <meliurwen@gmail.com>
Date: Tue, 18 Apr 2023 15:12:57 +0200
Subject: [PATCH] Added requirements file, added batch benchmarking script,
 added README, added ability to set number of processes to use

---
 .gitignore       |  3 +++
 README.md        | 51 +++++++++++++++++++++++++++++++++++++++
 benchmark.py     | 63 ++++++++++++++++++++++++++++++++++++++++--------
 requirements.txt |  2 ++
 run.sh           | 24 ++++++++++++++++++
 5 files changed, 133 insertions(+), 10 deletions(-)
 create mode 100644 README.md
 create mode 100644 requirements.txt
 create mode 100755 run.sh
diff --git a/.gitignore b/.gitignore
index 1d0e276..c27f516 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,6 @@ venv/
 *.json
 
 *.dat
+
+networks.d/
+*.tmp
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..39a5c35
--- /dev/null
+++ b/README.md
@@ -0,0 +1,51 @@
+# pyCTBN - Benchmarks
+
+This benchmark is tailored to work with the "modernized" and polished version
+of `pyCTBN`.
+
+## Preparation
+
+To clone with all submodules:
+
+```sh
+git clone --recursive git@git-service.tld:user/repo.git
+```
+
+If you cloned without `--recursive`, you probably want to initialize the
+submodules:
+
+```sh
+git submodule update --init --recursive
+```
+
+Create a virtual environment:
+
+```sh
+python3 -m venv .venv && source .venv/bin/activate
+```
+
+Install the dependencies:
+
+```sh
+pip3 install -r requirements.txt
+```
+
+## Usage
+
+Create a `networks.d` directory in this project workspace.
+
+Move the `json` files of the networks in the just created `networks.d`
+directory.
+
+Execute the shell script `./run.sh` to run the tests contained in the
+`networks.d` directory:
+
+```sh
+./run.sh
+```
+
+To learn to use directly the benchmark program:
+
+```sh
+./benchmark.py --help
+```
diff --git a/benchmark.py b/benchmark.py
index e8b36b3..9bf6b3a 100755
--- a/benchmark.py
+++ b/benchmark.py
@@ -10,10 +10,8 @@ from pyctbn.legacy import JsonImporter
 from pyctbn.legacy import SamplePath
 from pyctbn.legacy import StructureConstraintBasedEstimator
 
-def structure_constraint_based_estimation_example(network_file_path):
-    Path("./data").mkdir(parents=True, exist_ok=True)
-    # <read the json files in ./data path>
-    read_files = glob.glob(os.path.join("./data/", "*.json"))
+def structure_constraint_based_estimation_example(network_file_path, jobs):
+    print("Importing %s..." % (network_file_path))
     # <initialize a JsonImporter object for the first file>
     importer = JsonImporter(
         file_path=network_file_path,
@@ -26,11 +24,14 @@ def structure_constraint_based_estimation_example(network_file_path):
     start_time = time.time()
     # <import the data at index 0 of the outer json array>
     importer.import_data(0)
+    print("Data imported in %d seconds." % (time.time() - start_time))
     # construct a SamplePath Object passing a filled AbstractImporter object
     s1 = SamplePath(importer=importer)
     # build the trajectories
+    print("Building trajectories...")
     s1.build_trajectories()
     # build the information about the net
+    print("Building structure...")
     s1.build_structure()
     # construct a StructureEstimator object passing a correctly build SamplePath object
     # and the independence tests significance, if you have prior knowledge about
@@ -44,21 +45,63 @@ def structure_constraint_based_estimation_example(network_file_path):
         thumb_threshold=25
     )
     # call the algorithm to estimate the structure
-    se1.estimate_structure()
+    print("Estimating structure...")
+    start_estimating_time = time.time()
+    if jobs == 0:
+        disable_multiprocessing=False
+        processes_number = None
+    elif jobs == 1:
+        disable_multiprocessing = True
+        processes_number = None
+    else:
+        disable_multiprocessing = False
+        processes_number = jobs
+    se1.estimate_structure(
+        disable_multiprocessing=disable_multiprocessing,
+        processes_number=processes_number
+    )
+    print("Structure estimated in %d seconds." % (time.time() - start_estimating_time))
     end_time = time.time()
-    print("Elaspsed time: %d seconds" % (end_time - start_time))
+    print("Total elaspsed time for %s: %d seconds" % (network_file_path, end_time - start_time))
     # obtain the adjacency matrix of the estimated structure
     #print(se1.adjacency_matrix())
     Path("./res").mkdir(parents=True, exist_ok=True)
     # save the estimated structure  to a json file
     # (remember to specify the path AND the .json extension)....
-    se1.save_results("./res/results0.json")
+    se1.save_results("./res/results_%s" % (os.path.basename(network_file_path)))
     # ...or save it also in a graphical model fashion
     # (remember to specify the path AND the .png extension)
     #se1.save_plot_estimated_structure_graph("./res/result0.png")
 
-parser = argparse.ArgumentParser()
-parser.add_argument('filename', help="path of the network in json format")
+
+class JobsAction(argparse.Action):
+
+    def __call__(self, parser, namespace, values, option_string=None):
+        if values < 0:
+            parser.error(
+                "Minimum jobs for {0} is 0 (default), which means to "
+                "use all cores available.".format(option_string)
+            )
+        setattr(namespace, self.dest, values)
+
+parser = argparse.ArgumentParser(
+    prog='pyCTBN - Benchmark',
+    description="This benchmark program is tailored to work with the "
+    "\"modernized\" and polished version of pyCTBN."
+)
+parser.add_argument(
+    'network_path',
+    help="path of the network file in json format"
+)
+parser.add_argument(
+    '-j',
+    '--jobs',
+    action=JobsAction,
+    type=int,
+    default=0,
+    required=False,
+    help="number of jobs (processes) to use (0 by default, it uses all available cores)"
+)
 args = parser.parse_args()
 
-structure_constraint_based_estimation_example(args.filename)
+structure_constraint_based_estimation_example(args.network_path, args.jobs)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..4a8daa5
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+./deps/PyCTBN
+memory_profiler
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..6378a9f
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+set -e
+
+_jobs=0
+
+mkdir -p "./networks.d"
+mkdir -p "./profiling"
+
+find "./networks.d/" ! -name "$(printf "*\n*")" -name '*.json' > bench_nets.tmp
+
+while IFS= read -r _file; do
+    printf "Benchmarking '%s' file...\n" "$_file"
+    mprof run \
+        --output "profiling/mprofile_$(basename "$_file").dat" \
+        --backend psutil_pss \
+        --python \
+        --include-children \
+        ./benchmark.py \
+            -j $_jobs \
+            "$_file"
+    printf "Benchmark '%s' file completed.\n" "$_file"
+done < bench_nets.tmp
+rm bench_nets.tmp