Added requirements file, added batch benchmarking script, added README, added ability to set number of processes to use

master
Meliurwen 2 years ago
parent e62be8f201
commit 09a9d6b221
Signed by: meliurwen
GPG Key ID: 818A8B35E9F1CE10
  1. 3
      .gitignore
  2. 51
      README.md
  3. 63
      benchmark.py
  4. 2
      requirements.txt
  5. 24
      run.sh

3
.gitignore vendored

@ -5,3 +5,6 @@ venv/
*.json *.json
*.dat *.dat
networks.d/
*.tmp

@ -0,0 +1,51 @@
# pyCTBN - Benchmarks
This benchmark is tailored to work with the "modernized" and polished version
of `pyCTBN`.
## Preparation
To clone with all submodules:
```sh
git clone --recursive git@git-service.tld:user/repo.git
```
If you cloned without `--recursive`, you probably want to initialize the
submodules:
```sh
git submodule update --init --recursive
```
Create a virtual environment:
```sh
python3 -m venv .venv && source .venv/bin/activate
```
Install the dependencies:
```sh
pip3 install -r requirements.txt
```
## Usage
Create a `networks.d` directory in this project workspace.
Move the `json` files of the networks in the just created `networks.d`
directory.
Execute the shell script `./run.sh` to run the tests contained in the
`networks.d` directory:
```sh
./run.sh
```
To learn to use directly the benchmark program:
```sh
./benchmark.py --help
```

@ -10,10 +10,8 @@ from pyctbn.legacy import JsonImporter
from pyctbn.legacy import SamplePath from pyctbn.legacy import SamplePath
from pyctbn.legacy import StructureConstraintBasedEstimator from pyctbn.legacy import StructureConstraintBasedEstimator
def structure_constraint_based_estimation_example(network_file_path): def structure_constraint_based_estimation_example(network_file_path, jobs):
Path("./data").mkdir(parents=True, exist_ok=True) print("Importing %s..." % (network_file_path))
# <read the json files in ./data path>
read_files = glob.glob(os.path.join("./data/", "*.json"))
# <initialize a JsonImporter object for the first file> # <initialize a JsonImporter object for the first file>
importer = JsonImporter( importer = JsonImporter(
file_path=network_file_path, file_path=network_file_path,
@ -26,11 +24,14 @@ def structure_constraint_based_estimation_example(network_file_path):
start_time = time.time() start_time = time.time()
# <import the data at index 0 of the outer json array> # <import the data at index 0 of the outer json array>
importer.import_data(0) importer.import_data(0)
print("Data imported in %d seconds." % (time.time() - start_time))
# construct a SamplePath Object passing a filled AbstractImporter object # construct a SamplePath Object passing a filled AbstractImporter object
s1 = SamplePath(importer=importer) s1 = SamplePath(importer=importer)
# build the trajectories # build the trajectories
print("Building trajectories...")
s1.build_trajectories() s1.build_trajectories()
# build the information about the net # build the information about the net
print("Building structure...")
s1.build_structure() s1.build_structure()
# construct a StructureEstimator object passing a correctly build SamplePath object # construct a StructureEstimator object passing a correctly build SamplePath object
# and the independence tests significance, if you have prior knowledge about # and the independence tests significance, if you have prior knowledge about
@ -44,21 +45,63 @@ def structure_constraint_based_estimation_example(network_file_path):
thumb_threshold=25 thumb_threshold=25
) )
# call the algorithm to estimate the structure # call the algorithm to estimate the structure
se1.estimate_structure() print("Estimating structure...")
start_estimating_time = time.time()
if jobs == 0:
disable_multiprocessing=False
processes_number = None
elif jobs == 1:
disable_multiprocessing = True
processes_number = None
else:
disable_multiprocessing = False
processes_number = jobs
se1.estimate_structure(
disable_multiprocessing=disable_multiprocessing,
processes_number=processes_number
)
print("Structure estimated in %d seconds." % (time.time() - start_estimating_time))
end_time = time.time() end_time = time.time()
print("Elaspsed time: %d seconds" % (end_time - start_time)) print("Total elaspsed time for %s: %d seconds" % (network_file_path, end_time - start_time))
# obtain the adjacency matrix of the estimated structure # obtain the adjacency matrix of the estimated structure
#print(se1.adjacency_matrix()) #print(se1.adjacency_matrix())
Path("./res").mkdir(parents=True, exist_ok=True) Path("./res").mkdir(parents=True, exist_ok=True)
# save the estimated structure to a json file # save the estimated structure to a json file
# (remember to specify the path AND the .json extension).... # (remember to specify the path AND the .json extension)....
se1.save_results("./res/results0.json") se1.save_results("./res/results_%s" % (os.path.basename(network_file_path)))
# ...or save it also in a graphical model fashion # ...or save it also in a graphical model fashion
# (remember to specify the path AND the .png extension) # (remember to specify the path AND the .png extension)
#se1.save_plot_estimated_structure_graph("./res/result0.png") #se1.save_plot_estimated_structure_graph("./res/result0.png")
parser = argparse.ArgumentParser()
parser.add_argument('filename', help="path of the network in json format") class JobsAction(argparse.Action):
def __call__(self, parser, namespace, values, option_string=None):
if values < 0:
parser.error(
"Minimum jobs for {0} is 0 (default), which means to "
"use all cores available.".format(option_string)
)
setattr(namespace, self.dest, values)
parser = argparse.ArgumentParser(
prog='pyCTBN - Benchmark',
description="This benchmark program is tailored to work with the "
"\"modernized\" and polished version of pyCTBN."
)
parser.add_argument(
'network_path',
help="path of the network file in json format"
)
parser.add_argument(
'-j',
'--jobs',
action=JobsAction,
type=int,
default=0,
required=False,
help="number of jobs (processes) to use (0 by default, it uses all available cores)"
)
args = parser.parse_args() args = parser.parse_args()
structure_constraint_based_estimation_example(args.filename) structure_constraint_based_estimation_example(args.network_path, args.jobs)

@ -0,0 +1,2 @@
./deps/PyCTBN
memory_profiler

@ -0,0 +1,24 @@
#!/bin/sh
set -e
_jobs=0
mkdir -p "./networks.d"
mkdir -p "./profiling"
find "./networks.d/" ! -name "$(printf "*\n*")" -name '*.json' > bench_nets.tmp
while IFS= read -r _file; do
printf "Benchmarking '%s' file...\n" "$_file"
mprof run \
--output "profiling/mprofile_$(basename "$_file").dat" \
--backend psutil_pss \
--python \
--include-children \
./benchmark.py \
-j $_jobs \
"$_file"
printf "Benchmark '%s' file completed.\n" "$_file"
done < bench_nets.tmp
rm bench_nets.tmp
Loading…
Cancel
Save