Old engine for Continuous Time Bayesian Networks. Superseded by reCTBN. 🐍
https://github.com/madlabunimib/PyCTBN
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
347 lines
9.1 KiB
347 lines
9.1 KiB
4 years ago
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas._libs import join as _join
|
||
|
|
||
|
from pandas import Categorical, DataFrame, Index, merge
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
class TestIndexer:
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype", ["int32", "int64", "float32", "float64", "object"]
|
||
|
)
|
||
|
def test_outer_join_indexer(self, dtype):
|
||
|
indexer = _join.outer_join_indexer
|
||
|
|
||
|
left = np.arange(3, dtype=dtype)
|
||
|
right = np.arange(2, 5, dtype=dtype)
|
||
|
empty = np.array([], dtype=dtype)
|
||
|
|
||
|
result, lindexer, rindexer = indexer(left, right)
|
||
|
assert isinstance(result, np.ndarray)
|
||
|
assert isinstance(lindexer, np.ndarray)
|
||
|
assert isinstance(rindexer, np.ndarray)
|
||
|
tm.assert_numpy_array_equal(result, np.arange(5, dtype=dtype))
|
||
|
exp = np.array([0, 1, 2, -1, -1], dtype=np.int64)
|
||
|
tm.assert_numpy_array_equal(lindexer, exp)
|
||
|
exp = np.array([-1, -1, 0, 1, 2], dtype=np.int64)
|
||
|
tm.assert_numpy_array_equal(rindexer, exp)
|
||
|
|
||
|
result, lindexer, rindexer = indexer(empty, right)
|
||
|
tm.assert_numpy_array_equal(result, right)
|
||
|
exp = np.array([-1, -1, -1], dtype=np.int64)
|
||
|
tm.assert_numpy_array_equal(lindexer, exp)
|
||
|
exp = np.array([0, 1, 2], dtype=np.int64)
|
||
|
tm.assert_numpy_array_equal(rindexer, exp)
|
||
|
|
||
|
result, lindexer, rindexer = indexer(left, empty)
|
||
|
tm.assert_numpy_array_equal(result, left)
|
||
|
exp = np.array([0, 1, 2], dtype=np.int64)
|
||
|
tm.assert_numpy_array_equal(lindexer, exp)
|
||
|
exp = np.array([-1, -1, -1], dtype=np.int64)
|
||
|
tm.assert_numpy_array_equal(rindexer, exp)
|
||
|
|
||
|
|
||
|
def test_left_join_indexer_unique():
|
||
|
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
|
||
|
b = np.array([2, 2, 3, 4, 4], dtype=np.int64)
|
||
|
|
||
|
result = _join.left_join_indexer_unique(b, a)
|
||
|
expected = np.array([1, 1, 2, 3, 3], dtype=np.int64)
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
|
||
|
def test_left_outer_join_bug():
|
||
|
left = np.array(
|
||
|
[
|
||
|
0,
|
||
|
1,
|
||
|
0,
|
||
|
1,
|
||
|
1,
|
||
|
2,
|
||
|
3,
|
||
|
1,
|
||
|
0,
|
||
|
2,
|
||
|
1,
|
||
|
2,
|
||
|
0,
|
||
|
1,
|
||
|
1,
|
||
|
2,
|
||
|
3,
|
||
|
2,
|
||
|
3,
|
||
|
2,
|
||
|
1,
|
||
|
1,
|
||
|
3,
|
||
|
0,
|
||
|
3,
|
||
|
2,
|
||
|
3,
|
||
|
0,
|
||
|
0,
|
||
|
2,
|
||
|
3,
|
||
|
2,
|
||
|
0,
|
||
|
3,
|
||
|
1,
|
||
|
3,
|
||
|
0,
|
||
|
1,
|
||
|
3,
|
||
|
0,
|
||
|
0,
|
||
|
1,
|
||
|
0,
|
||
|
3,
|
||
|
1,
|
||
|
0,
|
||
|
1,
|
||
|
0,
|
||
|
1,
|
||
|
1,
|
||
|
0,
|
||
|
2,
|
||
|
2,
|
||
|
2,
|
||
|
2,
|
||
|
2,
|
||
|
0,
|
||
|
3,
|
||
|
1,
|
||
|
2,
|
||
|
0,
|
||
|
0,
|
||
|
3,
|
||
|
1,
|
||
|
3,
|
||
|
2,
|
||
|
2,
|
||
|
0,
|
||
|
1,
|
||
|
3,
|
||
|
0,
|
||
|
2,
|
||
|
3,
|
||
|
2,
|
||
|
3,
|
||
|
3,
|
||
|
2,
|
||
|
3,
|
||
|
3,
|
||
|
1,
|
||
|
3,
|
||
|
2,
|
||
|
0,
|
||
|
0,
|
||
|
3,
|
||
|
1,
|
||
|
1,
|
||
|
1,
|
||
|
0,
|
||
|
2,
|
||
|
3,
|
||
|
3,
|
||
|
1,
|
||
|
2,
|
||
|
0,
|
||
|
3,
|
||
|
1,
|
||
|
2,
|
||
|
0,
|
||
|
2,
|
||
|
],
|
||
|
dtype=np.int64,
|
||
|
)
|
||
|
|
||
|
right = np.array([3, 1], dtype=np.int64)
|
||
|
max_groups = 4
|
||
|
|
||
|
lidx, ridx = _join.left_outer_join(left, right, max_groups, sort=False)
|
||
|
|
||
|
exp_lidx = np.arange(len(left), dtype=np.int64)
|
||
|
exp_ridx = -np.ones(len(left), dtype=np.int64)
|
||
|
|
||
|
exp_ridx[left == 1] = 1
|
||
|
exp_ridx[left == 3] = 0
|
||
|
|
||
|
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||
|
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||
|
|
||
|
|
||
|
def test_inner_join_indexer():
|
||
|
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
|
||
|
b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
|
||
|
|
||
|
index, ares, bres = _join.inner_join_indexer(a, b)
|
||
|
|
||
|
index_exp = np.array([3, 5], dtype=np.int64)
|
||
|
tm.assert_almost_equal(index, index_exp)
|
||
|
|
||
|
aexp = np.array([2, 4], dtype=np.int64)
|
||
|
bexp = np.array([1, 2], dtype=np.int64)
|
||
|
tm.assert_almost_equal(ares, aexp)
|
||
|
tm.assert_almost_equal(bres, bexp)
|
||
|
|
||
|
a = np.array([5], dtype=np.int64)
|
||
|
b = np.array([5], dtype=np.int64)
|
||
|
|
||
|
index, ares, bres = _join.inner_join_indexer(a, b)
|
||
|
tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
|
||
|
tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64))
|
||
|
tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64))
|
||
|
|
||
|
|
||
|
def test_outer_join_indexer():
|
||
|
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
|
||
|
b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
|
||
|
|
||
|
index, ares, bres = _join.outer_join_indexer(a, b)
|
||
|
|
||
|
index_exp = np.array([0, 1, 2, 3, 4, 5, 7, 9], dtype=np.int64)
|
||
|
tm.assert_almost_equal(index, index_exp)
|
||
|
|
||
|
aexp = np.array([-1, 0, 1, 2, 3, 4, -1, -1], dtype=np.int64)
|
||
|
bexp = np.array([0, -1, -1, 1, -1, 2, 3, 4], dtype=np.int64)
|
||
|
tm.assert_almost_equal(ares, aexp)
|
||
|
tm.assert_almost_equal(bres, bexp)
|
||
|
|
||
|
a = np.array([5], dtype=np.int64)
|
||
|
b = np.array([5], dtype=np.int64)
|
||
|
|
||
|
index, ares, bres = _join.outer_join_indexer(a, b)
|
||
|
tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
|
||
|
tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64))
|
||
|
tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64))
|
||
|
|
||
|
|
||
|
def test_left_join_indexer():
|
||
|
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
|
||
|
b = np.array([0, 3, 5, 7, 9], dtype=np.int64)
|
||
|
|
||
|
index, ares, bres = _join.left_join_indexer(a, b)
|
||
|
|
||
|
tm.assert_almost_equal(index, a)
|
||
|
|
||
|
aexp = np.array([0, 1, 2, 3, 4], dtype=np.int64)
|
||
|
bexp = np.array([-1, -1, 1, -1, 2], dtype=np.int64)
|
||
|
tm.assert_almost_equal(ares, aexp)
|
||
|
tm.assert_almost_equal(bres, bexp)
|
||
|
|
||
|
a = np.array([5], dtype=np.int64)
|
||
|
b = np.array([5], dtype=np.int64)
|
||
|
|
||
|
index, ares, bres = _join.left_join_indexer(a, b)
|
||
|
tm.assert_numpy_array_equal(index, np.array([5], dtype=np.int64))
|
||
|
tm.assert_numpy_array_equal(ares, np.array([0], dtype=np.int64))
|
||
|
tm.assert_numpy_array_equal(bres, np.array([0], dtype=np.int64))
|
||
|
|
||
|
|
||
|
def test_left_join_indexer2():
|
||
|
idx = Index([1, 1, 2, 5])
|
||
|
idx2 = Index([1, 2, 5, 7, 9])
|
||
|
|
||
|
res, lidx, ridx = _join.left_join_indexer(idx2.values, idx.values)
|
||
|
|
||
|
exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64)
|
||
|
tm.assert_almost_equal(res, exp_res)
|
||
|
|
||
|
exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64)
|
||
|
tm.assert_almost_equal(lidx, exp_lidx)
|
||
|
|
||
|
exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64)
|
||
|
tm.assert_almost_equal(ridx, exp_ridx)
|
||
|
|
||
|
|
||
|
def test_outer_join_indexer2():
|
||
|
idx = Index([1, 1, 2, 5])
|
||
|
idx2 = Index([1, 2, 5, 7, 9])
|
||
|
|
||
|
res, lidx, ridx = _join.outer_join_indexer(idx2.values, idx.values)
|
||
|
|
||
|
exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64)
|
||
|
tm.assert_almost_equal(res, exp_res)
|
||
|
|
||
|
exp_lidx = np.array([0, 0, 1, 2, 3, 4], dtype=np.int64)
|
||
|
tm.assert_almost_equal(lidx, exp_lidx)
|
||
|
|
||
|
exp_ridx = np.array([0, 1, 2, 3, -1, -1], dtype=np.int64)
|
||
|
tm.assert_almost_equal(ridx, exp_ridx)
|
||
|
|
||
|
|
||
|
def test_inner_join_indexer2():
|
||
|
idx = Index([1, 1, 2, 5])
|
||
|
idx2 = Index([1, 2, 5, 7, 9])
|
||
|
|
||
|
res, lidx, ridx = _join.inner_join_indexer(idx2.values, idx.values)
|
||
|
|
||
|
exp_res = np.array([1, 1, 2, 5], dtype=np.int64)
|
||
|
tm.assert_almost_equal(res, exp_res)
|
||
|
|
||
|
exp_lidx = np.array([0, 0, 1, 2], dtype=np.int64)
|
||
|
tm.assert_almost_equal(lidx, exp_lidx)
|
||
|
|
||
|
exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64)
|
||
|
tm.assert_almost_equal(ridx, exp_ridx)
|
||
|
|
||
|
|
||
|
def test_merge_join_categorical_multiindex():
|
||
|
# From issue 16627
|
||
|
a = {
|
||
|
"Cat1": Categorical(["a", "b", "a", "c", "a", "b"], ["a", "b", "c"]),
|
||
|
"Int1": [0, 1, 0, 1, 0, 0],
|
||
|
}
|
||
|
a = DataFrame(a)
|
||
|
|
||
|
b = {
|
||
|
"Cat": Categorical(["a", "b", "c", "a", "b", "c"], ["a", "b", "c"]),
|
||
|
"Int": [0, 0, 0, 1, 1, 1],
|
||
|
"Factor": [1.1, 1.2, 1.3, 1.4, 1.5, 1.6],
|
||
|
}
|
||
|
b = DataFrame(b).set_index(["Cat", "Int"])["Factor"]
|
||
|
|
||
|
expected = merge(
|
||
|
a,
|
||
|
b.reset_index(),
|
||
|
left_on=["Cat1", "Int1"],
|
||
|
right_on=["Cat", "Int"],
|
||
|
how="left",
|
||
|
)
|
||
|
result = a.join(b, on=["Cat1", "Int1"])
|
||
|
expected = expected.drop(["Cat", "Int"], axis=1)
|
||
|
tm.assert_frame_equal(expected, result)
|
||
|
|
||
|
# Same test, but with ordered categorical
|
||
|
a = {
|
||
|
"Cat1": Categorical(
|
||
|
["a", "b", "a", "c", "a", "b"], ["b", "a", "c"], ordered=True
|
||
|
),
|
||
|
"Int1": [0, 1, 0, 1, 0, 0],
|
||
|
}
|
||
|
a = DataFrame(a)
|
||
|
|
||
|
b = {
|
||
|
"Cat": Categorical(
|
||
|
["a", "b", "c", "a", "b", "c"], ["b", "a", "c"], ordered=True
|
||
|
),
|
||
|
"Int": [0, 0, 0, 1, 1, 1],
|
||
|
"Factor": [1.1, 1.2, 1.3, 1.4, 1.5, 1.6],
|
||
|
}
|
||
|
b = DataFrame(b).set_index(["Cat", "Int"])["Factor"]
|
||
|
|
||
|
expected = merge(
|
||
|
a,
|
||
|
b.reset_index(),
|
||
|
left_on=["Cat1", "Int1"],
|
||
|
right_on=["Cat", "Int"],
|
||
|
how="left",
|
||
|
)
|
||
|
result = a.join(b, on=["Cat1", "Int1"])
|
||
|
expected = expected.drop(["Cat", "Int"], axis=1)
|
||
|
tm.assert_frame_equal(expected, result)
|