Skip to content

Commit

Permalink
fix test
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewgazelka committed Oct 28, 2024
1 parent 9457144 commit 23daae6
Showing 1 changed file with 8 additions and 17 deletions.
25 changes: 8 additions & 17 deletions tests/series/test_minhash.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from __future__ import annotations

from typing import Literal

import pytest

from daft import DataType, Series
from daft.daft import HashFunctionKind


def minhash_none(
series: Series,
num_hashes: int,
ngram_size: int,
seed: int | None,
hash_function: HashFunctionKind,
hash_function: Literal["murmurhash3", "xxhash", "sha1"],
) -> list[list[int] | None]:
if seed is None:
return series.minhash(num_hashes, ngram_size, hash_function=hash_function).to_pylist()
Expand Down Expand Up @@ -106,9 +107,7 @@ def test_minhash_exact_values(num_hashes, ngram_size, seed, expected):
@pytest.mark.parametrize("num_hashes", [0, -1, -100])
@pytest.mark.parametrize("ngram_size", [1, 2, 4, 5, 100])
@pytest.mark.parametrize("seed", [1, -1, 123, None])
@pytest.mark.parametrize(
"hash_function", [HashFunctionKind.MurmurHash3, HashFunctionKind.XxHash, HashFunctionKind.Sha1]
)
@pytest.mark.parametrize("hash_function", ["murmurhash3", "xxhash", "sha1"])
def test_minhash_fails_nonpositive_num_hashes(num_hashes, ngram_size, seed, hash_function):
with pytest.raises(ValueError, match="num_hashes must be positive"):
minhash_none(test_series, num_hashes, ngram_size, seed, hash_function)
Expand All @@ -117,9 +116,7 @@ def test_minhash_fails_nonpositive_num_hashes(num_hashes, ngram_size, seed, hash
@pytest.mark.parametrize("num_hashes", [1, 2, 16, 128])
@pytest.mark.parametrize("ngram_size", [0, -1, -100])
@pytest.mark.parametrize("seed", [1, -1, 123, None])
@pytest.mark.parametrize(
"hash_function", [HashFunctionKind.MurmurHash3, HashFunctionKind.XxHash, HashFunctionKind.Sha1]
)
@pytest.mark.parametrize("hash_function", ["murmurhash3", "xxhash", "sha1"])
def test_minhash_fails_nonpositive_ngram_size(num_hashes, ngram_size, seed, hash_function):
with pytest.raises(ValueError, match="ngram_size must be positive"):
minhash_none(test_series, num_hashes, ngram_size, seed, hash_function)
Expand All @@ -128,9 +125,7 @@ def test_minhash_fails_nonpositive_ngram_size(num_hashes, ngram_size, seed, hash
@pytest.mark.parametrize("num_hashes", [1, 2, 16, 128])
@pytest.mark.parametrize("ngram_size", [1, 2, 4, 5, 100])
@pytest.mark.parametrize("seed", [1, -1, 123, None])
@pytest.mark.parametrize(
"hash_function", [HashFunctionKind.MurmurHash3, HashFunctionKind.XxHash, HashFunctionKind.Sha1]
)
@pytest.mark.parametrize("hash_function", ["murmurhash3", "xxhash", "sha1"])
def test_minhash_empty_series(num_hashes, ngram_size, seed, hash_function):
series = Series.from_pylist([]).cast(DataType.string())

Expand All @@ -141,9 +136,7 @@ def test_minhash_empty_series(num_hashes, ngram_size, seed, hash_function):
@pytest.mark.parametrize("num_hashes", [1, 2, 16, 128])
@pytest.mark.parametrize("ngram_size", [1, 2, 4, 5, 100])
@pytest.mark.parametrize("seed", [1, -1, 123, None])
@pytest.mark.parametrize(
"hash_function", [HashFunctionKind.MurmurHash3, HashFunctionKind.XxHash, HashFunctionKind.Sha1]
)
@pytest.mark.parametrize("hash_function", ["murmurhash3", "xxhash", "sha1"])
def test_minhash_seed_consistency(num_hashes, ngram_size, seed, hash_function):
minhash1 = minhash_none(test_series, num_hashes, ngram_size, seed, hash_function)
minhash2 = minhash_none(test_series, num_hashes, ngram_size, seed, hash_function)
Expand All @@ -153,9 +146,7 @@ def test_minhash_seed_consistency(num_hashes, ngram_size, seed, hash_function):
@pytest.mark.parametrize("num_hashes", [1, 2, 16, 128])
@pytest.mark.parametrize("ngram_size", [1, 2, 4, 5, 100])
@pytest.mark.parametrize("seed_pair", [[1, 2], [1, 5], [None, 2], [123, 234]])
@pytest.mark.parametrize(
"hash_function", [HashFunctionKind.MurmurHash3, HashFunctionKind.XxHash, HashFunctionKind.Sha1]
)
@pytest.mark.parametrize("hash_function", ["murmurhash3", "xxhash", "sha1"])
def test_minhash_seed_differences(num_hashes, ngram_size, seed_pair, hash_function):
minhash1 = minhash_none(test_series, num_hashes, ngram_size, seed_pair[0], hash_function)
minhash2 = minhash_none(test_series, num_hashes, ngram_size, seed_pair[1], hash_function)
Expand Down

0 comments on commit 23daae6

Please sign in to comment.