From 09a9d6b221d499e5293ec190c1df9c02bd70acc3 Mon Sep 17 00:00:00 2001 From: meliurwen Date: Tue, 18 Apr 2023 15:12:57 +0200 Subject: [PATCH] Added requirements file, added batch benchmarking script, added README, added ability to set number of processes to use --- .gitignore | 3 +++ README.md | 51 +++++++++++++++++++++++++++++++++++++++ benchmark.py | 63 ++++++++++++++++++++++++++++++++++++++++-------- requirements.txt | 2 ++ run.sh | 24 ++++++++++++++++++ 5 files changed, 133 insertions(+), 10 deletions(-) create mode 100644 README.md create mode 100644 requirements.txt create mode 100755 run.sh diff --git a/.gitignore b/.gitignore index 1d0e276..c27f516 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ venv/ *.json *.dat + +networks.d/ +*.tmp diff --git a/README.md b/README.md new file mode 100644 index 0000000..39a5c35 --- /dev/null +++ b/README.md @@ -0,0 +1,51 @@ +# pyCTBN - Benchmarks + +This benchmark is tailored to work with the "modernized" and polished version +of `pyCTBN`. + +## Preparation + +To clone with all submodules: + +```sh +git clone --recursive git@git-service.tld:user/repo.git +``` + +If you cloned without `--recursive`, you probably want to initialize the +submodules: + +```sh +git submodule update --init --recursive +``` + +Create a virtual environment: + +```sh +python3 -m venv .venv && source .venv/bin/activate +``` + +Install the dependencies: + +```sh +pip3 install -r requirements.txt +``` + +## Usage + +Create a `networks.d` directory in this project workspace. + +Move the `json` files of the networks in the just created `networks.d` +directory. + +Execute the shell script `./run.sh` to run the tests contained in the +`networks.d` directory: + +```sh +./run.sh +``` + +To learn to use directly the benchmark program: + +```sh +./benchmark.py --help +``` diff --git a/benchmark.py b/benchmark.py index e8b36b3..9bf6b3a 100755 --- a/benchmark.py +++ b/benchmark.py @@ -10,10 +10,8 @@ from pyctbn.legacy import JsonImporter from pyctbn.legacy import SamplePath from pyctbn.legacy import StructureConstraintBasedEstimator -def structure_constraint_based_estimation_example(network_file_path): - Path("./data").mkdir(parents=True, exist_ok=True) - # - read_files = glob.glob(os.path.join("./data/", "*.json")) +def structure_constraint_based_estimation_example(network_file_path, jobs): + print("Importing %s..." % (network_file_path)) # importer = JsonImporter( file_path=network_file_path, @@ -26,11 +24,14 @@ def structure_constraint_based_estimation_example(network_file_path): start_time = time.time() # importer.import_data(0) + print("Data imported in %d seconds." % (time.time() - start_time)) # construct a SamplePath Object passing a filled AbstractImporter object s1 = SamplePath(importer=importer) # build the trajectories + print("Building trajectories...") s1.build_trajectories() # build the information about the net + print("Building structure...") s1.build_structure() # construct a StructureEstimator object passing a correctly build SamplePath object # and the independence tests significance, if you have prior knowledge about @@ -44,21 +45,63 @@ def structure_constraint_based_estimation_example(network_file_path): thumb_threshold=25 ) # call the algorithm to estimate the structure - se1.estimate_structure() + print("Estimating structure...") + start_estimating_time = time.time() + if jobs == 0: + disable_multiprocessing=False + processes_number = None + elif jobs == 1: + disable_multiprocessing = True + processes_number = None + else: + disable_multiprocessing = False + processes_number = jobs + se1.estimate_structure( + disable_multiprocessing=disable_multiprocessing, + processes_number=processes_number + ) + print("Structure estimated in %d seconds." % (time.time() - start_estimating_time)) end_time = time.time() - print("Elaspsed time: %d seconds" % (end_time - start_time)) + print("Total elaspsed time for %s: %d seconds" % (network_file_path, end_time - start_time)) # obtain the adjacency matrix of the estimated structure #print(se1.adjacency_matrix()) Path("./res").mkdir(parents=True, exist_ok=True) # save the estimated structure to a json file # (remember to specify the path AND the .json extension).... - se1.save_results("./res/results0.json") + se1.save_results("./res/results_%s" % (os.path.basename(network_file_path))) # ...or save it also in a graphical model fashion # (remember to specify the path AND the .png extension) #se1.save_plot_estimated_structure_graph("./res/result0.png") -parser = argparse.ArgumentParser() -parser.add_argument('filename', help="path of the network in json format") + +class JobsAction(argparse.Action): + + def __call__(self, parser, namespace, values, option_string=None): + if values < 0: + parser.error( + "Minimum jobs for {0} is 0 (default), which means to " + "use all cores available.".format(option_string) + ) + setattr(namespace, self.dest, values) + +parser = argparse.ArgumentParser( + prog='pyCTBN - Benchmark', + description="This benchmark program is tailored to work with the " + "\"modernized\" and polished version of pyCTBN." +) +parser.add_argument( + 'network_path', + help="path of the network file in json format" +) +parser.add_argument( + '-j', + '--jobs', + action=JobsAction, + type=int, + default=0, + required=False, + help="number of jobs (processes) to use (0 by default, it uses all available cores)" +) args = parser.parse_args() -structure_constraint_based_estimation_example(args.filename) +structure_constraint_based_estimation_example(args.network_path, args.jobs) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4a8daa5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +./deps/PyCTBN +memory_profiler diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..6378a9f --- /dev/null +++ b/run.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +set -e + +_jobs=0 + +mkdir -p "./networks.d" +mkdir -p "./profiling" + +find "./networks.d/" ! -name "$(printf "*\n*")" -name '*.json' > bench_nets.tmp + +while IFS= read -r _file; do + printf "Benchmarking '%s' file...\n" "$_file" + mprof run \ + --output "profiling/mprofile_$(basename "$_file").dat" \ + --backend psutil_pss \ + --python \ + --include-children \ + ./benchmark.py \ + -j $_jobs \ + "$_file" + printf "Benchmark '%s' file completed.\n" "$_file" +done < bench_nets.tmp +rm bench_nets.tmp