Old engine for Continuous Time Bayesian Networks. Superseded by reCTBN. 🐍
https://github.com/madlabunimib/PyCTBN
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
187 lines
6.2 KiB
187 lines
6.2 KiB
4 years ago
|
import os
|
||
|
import subprocess
|
||
|
import sys
|
||
|
import textwrap
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
import pandas.io.common as icom
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"obj",
|
||
|
[
|
||
|
pd.DataFrame(
|
||
|
100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
|
||
|
columns=["X", "Y", "Z"],
|
||
|
),
|
||
|
pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"),
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
|
||
|
def test_compression_size(obj, method, compression_only):
|
||
|
with tm.ensure_clean() as path:
|
||
|
getattr(obj, method)(path, compression=compression_only)
|
||
|
compressed_size = os.path.getsize(path)
|
||
|
getattr(obj, method)(path, compression=None)
|
||
|
uncompressed_size = os.path.getsize(path)
|
||
|
assert uncompressed_size > compressed_size
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"obj",
|
||
|
[
|
||
|
pd.DataFrame(
|
||
|
100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
|
||
|
columns=["X", "Y", "Z"],
|
||
|
),
|
||
|
pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"),
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("method", ["to_csv", "to_json"])
|
||
|
def test_compression_size_fh(obj, method, compression_only):
|
||
|
with tm.ensure_clean() as path:
|
||
|
f, handles = icom.get_handle(path, "w", compression=compression_only)
|
||
|
with f:
|
||
|
getattr(obj, method)(f)
|
||
|
assert not f.closed
|
||
|
assert f.closed
|
||
|
compressed_size = os.path.getsize(path)
|
||
|
with tm.ensure_clean() as path:
|
||
|
f, handles = icom.get_handle(path, "w", compression=None)
|
||
|
with f:
|
||
|
getattr(obj, method)(f)
|
||
|
assert not f.closed
|
||
|
assert f.closed
|
||
|
uncompressed_size = os.path.getsize(path)
|
||
|
assert uncompressed_size > compressed_size
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"write_method, write_kwargs, read_method",
|
||
|
[
|
||
|
("to_csv", {"index": False}, pd.read_csv),
|
||
|
("to_json", {}, pd.read_json),
|
||
|
("to_pickle", {}, pd.read_pickle),
|
||
|
],
|
||
|
)
|
||
|
def test_dataframe_compression_defaults_to_infer(
|
||
|
write_method, write_kwargs, read_method, compression_only
|
||
|
):
|
||
|
# GH22004
|
||
|
input = pd.DataFrame([[1.0, 0, -4], [3.4, 5, 2]], columns=["X", "Y", "Z"])
|
||
|
extension = icom._compression_to_extension[compression_only]
|
||
|
with tm.ensure_clean("compressed" + extension) as path:
|
||
|
getattr(input, write_method)(path, **write_kwargs)
|
||
|
output = read_method(path, compression=compression_only)
|
||
|
tm.assert_frame_equal(output, input)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"write_method,write_kwargs,read_method,read_kwargs",
|
||
|
[
|
||
|
("to_csv", {"index": False, "header": True}, pd.read_csv, {"squeeze": True}),
|
||
|
("to_json", {}, pd.read_json, {"typ": "series"}),
|
||
|
("to_pickle", {}, pd.read_pickle, {}),
|
||
|
],
|
||
|
)
|
||
|
def test_series_compression_defaults_to_infer(
|
||
|
write_method, write_kwargs, read_method, read_kwargs, compression_only
|
||
|
):
|
||
|
# GH22004
|
||
|
input = pd.Series([0, 5, -2, 10], name="X")
|
||
|
extension = icom._compression_to_extension[compression_only]
|
||
|
with tm.ensure_clean("compressed" + extension) as path:
|
||
|
getattr(input, write_method)(path, **write_kwargs)
|
||
|
output = read_method(path, compression=compression_only, **read_kwargs)
|
||
|
tm.assert_series_equal(output, input, check_names=False)
|
||
|
|
||
|
|
||
|
def test_compression_warning(compression_only):
|
||
|
# Assert that passing a file object to to_csv while explicitly specifying a
|
||
|
# compression protocol triggers a RuntimeWarning, as per GH21227.
|
||
|
df = pd.DataFrame(
|
||
|
100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
|
||
|
columns=["X", "Y", "Z"],
|
||
|
)
|
||
|
with tm.ensure_clean() as path:
|
||
|
f, handles = icom.get_handle(path, "w", compression=compression_only)
|
||
|
with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False):
|
||
|
with f:
|
||
|
df.to_csv(f, compression=compression_only)
|
||
|
|
||
|
|
||
|
def test_with_missing_lzma():
|
||
|
"""Tests if import pandas works when lzma is not present."""
|
||
|
# https://github.com/pandas-dev/pandas/issues/27575
|
||
|
code = textwrap.dedent(
|
||
|
"""\
|
||
|
import sys
|
||
|
sys.modules['lzma'] = None
|
||
|
import pandas
|
||
|
"""
|
||
|
)
|
||
|
subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE)
|
||
|
|
||
|
|
||
|
def test_with_missing_lzma_runtime():
|
||
|
"""Tests if RuntimeError is hit when calling lzma without
|
||
|
having the module available.
|
||
|
"""
|
||
|
code = textwrap.dedent(
|
||
|
"""
|
||
|
import sys
|
||
|
import pytest
|
||
|
sys.modules['lzma'] = None
|
||
|
import pandas
|
||
|
df = pandas.DataFrame()
|
||
|
with pytest.raises(RuntimeError, match='lzma module'):
|
||
|
df.to_csv('foo.csv', compression='xz')
|
||
|
"""
|
||
|
)
|
||
|
subprocess.check_output([sys.executable, "-c", code], stderr=subprocess.PIPE)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"obj",
|
||
|
[
|
||
|
pd.DataFrame(
|
||
|
100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
|
||
|
columns=["X", "Y", "Z"],
|
||
|
),
|
||
|
pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"),
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
|
||
|
def test_gzip_compression_level(obj, method):
|
||
|
# GH33196
|
||
|
with tm.ensure_clean() as path:
|
||
|
getattr(obj, method)(path, compression="gzip")
|
||
|
compressed_size_default = os.path.getsize(path)
|
||
|
getattr(obj, method)(path, compression={"method": "gzip", "compresslevel": 1})
|
||
|
compressed_size_fast = os.path.getsize(path)
|
||
|
assert compressed_size_default < compressed_size_fast
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"obj",
|
||
|
[
|
||
|
pd.DataFrame(
|
||
|
100 * [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
|
||
|
columns=["X", "Y", "Z"],
|
||
|
),
|
||
|
pd.Series(100 * [0.123456, 0.234567, 0.567567], name="X"),
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("method", ["to_pickle", "to_json", "to_csv"])
|
||
|
def test_bzip_compression_level(obj, method):
|
||
|
"""GH33196 bzip needs file size > 100k to show a size difference between
|
||
|
compression levels, so here we just check if the call works when
|
||
|
compression is passed as a dict.
|
||
|
"""
|
||
|
with tm.ensure_clean() as path:
|
||
|
getattr(obj, method)(path, compression={"method": "bz2", "compresslevel": 1})
|