Old engine for Continuous Time Bayesian Networks. Superseded by reCTBN. 🐍
https://github.com/madlabunimib/PyCTBN
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
260 lines
7.8 KiB
260 lines
7.8 KiB
4 years ago
|
from textwrap import dedent
|
||
|
|
||
|
import numpy as np
|
||
|
from numpy.random import randint
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import DataFrame, get_option, read_clipboard
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
from pandas.io.clipboard import clipboard_get, clipboard_set
|
||
|
|
||
|
|
||
|
def build_kwargs(sep, excel):
|
||
|
kwargs = {}
|
||
|
if excel != "default":
|
||
|
kwargs["excel"] = excel
|
||
|
if sep != "default":
|
||
|
kwargs["sep"] = sep
|
||
|
return kwargs
|
||
|
|
||
|
|
||
|
@pytest.fixture(
|
||
|
params=[
|
||
|
"delims",
|
||
|
"utf8",
|
||
|
"utf16",
|
||
|
"string",
|
||
|
"long",
|
||
|
"nonascii",
|
||
|
"colwidth",
|
||
|
"mixed",
|
||
|
"float",
|
||
|
"int",
|
||
|
]
|
||
|
)
|
||
|
def df(request):
|
||
|
data_type = request.param
|
||
|
|
||
|
if data_type == "delims":
|
||
|
return pd.DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]})
|
||
|
elif data_type == "utf8":
|
||
|
return pd.DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]})
|
||
|
elif data_type == "utf16":
|
||
|
return pd.DataFrame(
|
||
|
{"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]}
|
||
|
)
|
||
|
elif data_type == "string":
|
||
|
return tm.makeCustomDataframe(
|
||
|
5, 3, c_idx_type="s", r_idx_type="i", c_idx_names=[None], r_idx_names=[None]
|
||
|
)
|
||
|
elif data_type == "long":
|
||
|
max_rows = get_option("display.max_rows")
|
||
|
return tm.makeCustomDataframe(
|
||
|
max_rows + 1,
|
||
|
3,
|
||
|
data_gen_f=lambda *args: randint(2),
|
||
|
c_idx_type="s",
|
||
|
r_idx_type="i",
|
||
|
c_idx_names=[None],
|
||
|
r_idx_names=[None],
|
||
|
)
|
||
|
elif data_type == "nonascii":
|
||
|
return pd.DataFrame({"en": "in English".split(), "es": "en español".split()})
|
||
|
elif data_type == "colwidth":
|
||
|
_cw = get_option("display.max_colwidth") + 1
|
||
|
return tm.makeCustomDataframe(
|
||
|
5,
|
||
|
3,
|
||
|
data_gen_f=lambda *args: "x" * _cw,
|
||
|
c_idx_type="s",
|
||
|
r_idx_type="i",
|
||
|
c_idx_names=[None],
|
||
|
r_idx_names=[None],
|
||
|
)
|
||
|
elif data_type == "mixed":
|
||
|
return DataFrame(
|
||
|
{
|
||
|
"a": np.arange(1.0, 6.0) + 0.01,
|
||
|
"b": np.arange(1, 6).astype(np.int64),
|
||
|
"c": list("abcde"),
|
||
|
}
|
||
|
)
|
||
|
elif data_type == "float":
|
||
|
return tm.makeCustomDataframe(
|
||
|
5,
|
||
|
3,
|
||
|
data_gen_f=lambda r, c: float(r) + 0.01,
|
||
|
c_idx_type="s",
|
||
|
r_idx_type="i",
|
||
|
c_idx_names=[None],
|
||
|
r_idx_names=[None],
|
||
|
)
|
||
|
elif data_type == "int":
|
||
|
return tm.makeCustomDataframe(
|
||
|
5,
|
||
|
3,
|
||
|
data_gen_f=lambda *args: randint(2),
|
||
|
c_idx_type="s",
|
||
|
r_idx_type="i",
|
||
|
c_idx_names=[None],
|
||
|
r_idx_names=[None],
|
||
|
)
|
||
|
else:
|
||
|
raise ValueError
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def mock_clipboard(monkeypatch, request):
|
||
|
"""Fixture mocking clipboard IO.
|
||
|
|
||
|
This mocks pandas.io.clipboard.clipboard_get and
|
||
|
pandas.io.clipboard.clipboard_set.
|
||
|
|
||
|
This uses a local dict for storing data. The dictionary
|
||
|
key used is the test ID, available with ``request.node.name``.
|
||
|
|
||
|
This returns the local dictionary, for direct manipulation by
|
||
|
tests.
|
||
|
"""
|
||
|
# our local clipboard for tests
|
||
|
_mock_data = {}
|
||
|
|
||
|
def _mock_set(data):
|
||
|
_mock_data[request.node.name] = data
|
||
|
|
||
|
def _mock_get():
|
||
|
return _mock_data[request.node.name]
|
||
|
|
||
|
monkeypatch.setattr("pandas.io.clipboard.clipboard_set", _mock_set)
|
||
|
monkeypatch.setattr("pandas.io.clipboard.clipboard_get", _mock_get)
|
||
|
|
||
|
yield _mock_data
|
||
|
|
||
|
|
||
|
@pytest.mark.clipboard
|
||
|
def test_mock_clipboard(mock_clipboard):
|
||
|
import pandas.io.clipboard
|
||
|
|
||
|
pandas.io.clipboard.clipboard_set("abc")
|
||
|
assert "abc" in set(mock_clipboard.values())
|
||
|
result = pandas.io.clipboard.clipboard_get()
|
||
|
assert result == "abc"
|
||
|
|
||
|
|
||
|
@pytest.mark.single
|
||
|
@pytest.mark.clipboard
|
||
|
@pytest.mark.usefixtures("mock_clipboard")
|
||
|
class TestClipboard:
|
||
|
def check_round_trip_frame(self, data, excel=None, sep=None, encoding=None):
|
||
|
data.to_clipboard(excel=excel, sep=sep, encoding=encoding)
|
||
|
result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding)
|
||
|
tm.assert_frame_equal(data, result)
|
||
|
|
||
|
# Test that default arguments copy as tab delimited
|
||
|
def test_round_trip_frame(self, df):
|
||
|
self.check_round_trip_frame(df)
|
||
|
|
||
|
# Test that explicit delimiters are respected
|
||
|
@pytest.mark.parametrize("sep", ["\t", ",", "|"])
|
||
|
def test_round_trip_frame_sep(self, df, sep):
|
||
|
self.check_round_trip_frame(df, sep=sep)
|
||
|
|
||
|
# Test white space separator
|
||
|
def test_round_trip_frame_string(self, df):
|
||
|
df.to_clipboard(excel=False, sep=None)
|
||
|
result = read_clipboard()
|
||
|
assert df.to_string() == result.to_string()
|
||
|
assert df.shape == result.shape
|
||
|
|
||
|
# Two character separator is not supported in to_clipboard
|
||
|
# Test that multi-character separators are not silently passed
|
||
|
def test_excel_sep_warning(self, df):
|
||
|
with tm.assert_produces_warning():
|
||
|
df.to_clipboard(excel=True, sep=r"\t")
|
||
|
|
||
|
# Separator is ignored when excel=False and should produce a warning
|
||
|
def test_copy_delim_warning(self, df):
|
||
|
with tm.assert_produces_warning():
|
||
|
df.to_clipboard(excel=False, sep="\t")
|
||
|
|
||
|
# Tests that the default behavior of to_clipboard is tab
|
||
|
# delimited and excel="True"
|
||
|
@pytest.mark.parametrize("sep", ["\t", None, "default"])
|
||
|
@pytest.mark.parametrize("excel", [True, None, "default"])
|
||
|
def test_clipboard_copy_tabs_default(self, sep, excel, df, request, mock_clipboard):
|
||
|
kwargs = build_kwargs(sep, excel)
|
||
|
df.to_clipboard(**kwargs)
|
||
|
assert mock_clipboard[request.node.name] == df.to_csv(sep="\t")
|
||
|
|
||
|
# Tests reading of white space separated tables
|
||
|
@pytest.mark.parametrize("sep", [None, "default"])
|
||
|
@pytest.mark.parametrize("excel", [False])
|
||
|
def test_clipboard_copy_strings(self, sep, excel, df):
|
||
|
kwargs = build_kwargs(sep, excel)
|
||
|
df.to_clipboard(**kwargs)
|
||
|
result = read_clipboard(sep=r"\s+")
|
||
|
assert result.to_string() == df.to_string()
|
||
|
assert df.shape == result.shape
|
||
|
|
||
|
def test_read_clipboard_infer_excel(self, request, mock_clipboard):
|
||
|
# gh-19010: avoid warnings
|
||
|
clip_kwargs = dict(engine="python")
|
||
|
|
||
|
text = dedent(
|
||
|
"""
|
||
|
John James Charlie Mingus
|
||
|
1 2
|
||
|
4 Harry Carney
|
||
|
""".strip()
|
||
|
)
|
||
|
mock_clipboard[request.node.name] = text
|
||
|
df = pd.read_clipboard(**clip_kwargs)
|
||
|
|
||
|
# excel data is parsed correctly
|
||
|
assert df.iloc[1][1] == "Harry Carney"
|
||
|
|
||
|
# having diff tab counts doesn't trigger it
|
||
|
text = dedent(
|
||
|
"""
|
||
|
a\t b
|
||
|
1 2
|
||
|
3 4
|
||
|
""".strip()
|
||
|
)
|
||
|
mock_clipboard[request.node.name] = text
|
||
|
res = pd.read_clipboard(**clip_kwargs)
|
||
|
|
||
|
text = dedent(
|
||
|
"""
|
||
|
a b
|
||
|
1 2
|
||
|
3 4
|
||
|
""".strip()
|
||
|
)
|
||
|
mock_clipboard[request.node.name] = text
|
||
|
exp = pd.read_clipboard(**clip_kwargs)
|
||
|
|
||
|
tm.assert_frame_equal(res, exp)
|
||
|
|
||
|
def test_invalid_encoding(self, df):
|
||
|
# test case for testing invalid encoding
|
||
|
with pytest.raises(ValueError):
|
||
|
df.to_clipboard(encoding="ascii")
|
||
|
with pytest.raises(NotImplementedError):
|
||
|
pd.read_clipboard(encoding="ascii")
|
||
|
|
||
|
@pytest.mark.parametrize("enc", ["UTF-8", "utf-8", "utf8"])
|
||
|
def test_round_trip_valid_encodings(self, enc, df):
|
||
|
self.check_round_trip_frame(df, encoding=enc)
|
||
|
|
||
|
|
||
|
@pytest.mark.single
|
||
|
@pytest.mark.clipboard
|
||
|
@pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑´...", "abcd..."])
|
||
|
def test_raw_roundtrip(data):
|
||
|
# PR #25040 wide unicode wasn't copied correctly on PY3 on windows
|
||
|
clipboard_set(data)
|
||
|
assert data == clipboard_get()
|