Skip to content

Commit

Permalink
fix: Wrong total documents calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
ppodolsky committed Jun 14, 2023
1 parent 7b144a8 commit c78b3c4
Show file tree
Hide file tree
Showing 10 changed files with 296 additions and 285 deletions.
2 changes: 1 addition & 1 deletion summa-core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "summa-core"
version = "0.16.0"
version = "0.17.0"
authors = ["Pasha Podolsky <[email protected]>"]
edition = "2021"
license-file = "LICENSE"
Expand Down
1 change: 1 addition & 0 deletions summa-core/src/components/index_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ impl IndexRegistry {
snippet_generators_futures.push(async move { (doc_references.index_alias.as_str(), sg_config.as_tantivy_async().await) });
}
}
has_next |= total_documents > limit + offset;

trace!(action = "generate_snippets");
let snippet_generators: HashMap<&str, Vec<(String, SnippetGenerator)>> =
Expand Down
4 changes: 2 additions & 2 deletions summa-embed-py/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "summa-embed-py"
version = "0.16.0"
version = "0.17.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -15,7 +15,7 @@ pyo3 = { version = "0.18", features = ["serde"] }
pyo3-asyncio = { version = "0.18", features = ["attributes", "tokio-runtime"] }
pyo3-log = "0.8"
serde_json = "1.0"
summa-core = { version = "0.16.0", path = "../summa-core", default_features = false, features = ["fs", "hyper-external-request", "tokio-rt"] }
summa-core = { version = "0.17.0", path = "../summa-core", default_features = false, features = ["fs", "hyper-external-request", "tokio-rt"] }
summa-proto = { workspace = true }
tantivy = { workspace = true }
tokio = { workspace = true }
2 changes: 1 addition & 1 deletion summa-embed-py/summa_embed/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class IndexRegistry:
def __init__(self):
self.index_registry = IndexRegistryBin()

async def add(self, index_config, index_name: Optional[str] = None) -> index_service_pb2.IndexAttributes:
async def add(self, index_config, index_name: str) -> index_service_pb2.IndexAttributes:
parsed_index_config = index_service_pb2.IndexEngineConfig()
ParseDict(index_config, parsed_index_config)
index_attributes_bytes = await self.index_registry.add(
Expand Down
172 changes: 86 additions & 86 deletions summa-embed-py/summa_embed/proto/index_service_pb2.py

Large diffs are not rendered by default.

8 changes: 2 additions & 6 deletions summa-embed-py/summa_embed/proto/index_service_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -235,22 +235,18 @@ class GetIndicesResponse(_message.Message):
def __init__(self, index_names: _Optional[_Iterable[str]] = ...) -> None: ...

class IndexAttributes(_message.Message):
__slots__ = ["conflict_strategy", "created_at", "default_index_name", "default_snippets", "description", "multi_fields", "unique_fields"]
__slots__ = ["conflict_strategy", "created_at", "description", "multi_fields", "unique_fields"]
CONFLICT_STRATEGY_FIELD_NUMBER: _ClassVar[int]
CREATED_AT_FIELD_NUMBER: _ClassVar[int]
DEFAULT_INDEX_NAME_FIELD_NUMBER: _ClassVar[int]
DEFAULT_SNIPPETS_FIELD_NUMBER: _ClassVar[int]
DESCRIPTION_FIELD_NUMBER: _ClassVar[int]
MULTI_FIELDS_FIELD_NUMBER: _ClassVar[int]
UNIQUE_FIELDS_FIELD_NUMBER: _ClassVar[int]
conflict_strategy: ConflictStrategy
created_at: int
default_index_name: str
default_snippets: _containers.RepeatedScalarFieldContainer[str]
description: str
multi_fields: _containers.RepeatedScalarFieldContainer[str]
unique_fields: _containers.RepeatedScalarFieldContainer[str]
def __init__(self, created_at: _Optional[int] = ..., unique_fields: _Optional[_Iterable[str]] = ..., multi_fields: _Optional[_Iterable[str]] = ..., default_index_name: _Optional[str] = ..., description: _Optional[str] = ..., default_snippets: _Optional[_Iterable[str]] = ..., conflict_strategy: _Optional[_Union[ConflictStrategy, str]] = ...) -> None: ...
def __init__(self, created_at: _Optional[int] = ..., unique_fields: _Optional[_Iterable[str]] = ..., multi_fields: _Optional[_Iterable[str]] = ..., description: _Optional[str] = ..., conflict_strategy: _Optional[_Union[ConflictStrategy, str]] = ...) -> None: ...

class IndexDescription(_message.Message):
__slots__ = ["compression", "index_aliases", "index_attributes", "index_engine", "index_name", "num_docs"]
Expand Down
316 changes: 159 additions & 157 deletions summa-embed-py/summa_embed/proto/query_pb2.py

Large diffs are not rendered by default.

68 changes: 40 additions & 28 deletions summa-embed-py/summa_embed/proto/query_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,14 @@ class EmptyQuery(_message.Message):
def __init__(self) -> None: ...

class ExactMatchesPromoter(_message.Message):
__slots__ = ["boost", "slop"]
__slots__ = ["boost", "fields", "slop"]
BOOST_FIELD_NUMBER: _ClassVar[int]
FIELDS_FIELD_NUMBER: _ClassVar[int]
SLOP_FIELD_NUMBER: _ClassVar[int]
boost: float
fields: _containers.RepeatedScalarFieldContainer[str]
slop: int
def __init__(self, slop: _Optional[int] = ..., boost: _Optional[float] = ...) -> None: ...
def __init__(self, slop: _Optional[int] = ..., boost: _Optional[float] = ..., fields: _Optional[_Iterable[str]] = ...) -> None: ...

class ExistsQuery(_message.Message):
__slots__ = ["field"]
Expand Down Expand Up @@ -241,14 +243,6 @@ class FacetCollectorOutput(_message.Message):
facet_counts: _containers.ScalarMap[str, int]
def __init__(self, facet_counts: _Optional[_Mapping[str, int]] = ...) -> None: ...

class FieldMapper(_message.Message):
__slots__ = ["fields", "mapper_name"]
FIELDS_FIELD_NUMBER: _ClassVar[int]
MAPPER_NAME_FIELD_NUMBER: _ClassVar[int]
fields: _containers.RepeatedScalarFieldContainer[str]
mapper_name: str
def __init__(self, mapper_name: _Optional[str] = ..., fields: _Optional[_Iterable[str]] = ...) -> None: ...

class Highlight(_message.Message):
__slots__ = ["to"]
FROM_FIELD_NUMBER: _ClassVar[int]
Expand Down Expand Up @@ -286,12 +280,6 @@ class HistogramResult(_message.Message):
buckets: _containers.RepeatedCompositeFieldContainer[BucketEntry]
def __init__(self, buckets: _Optional[_Iterable[_Union[BucketEntry, _Mapping]]] = ...) -> None: ...

class InflectionConfig(_message.Message):
__slots__ = ["derive_plural"]
DERIVE_PLURAL_FIELD_NUMBER: _ClassVar[int]
derive_plural: bool
def __init__(self, derive_plural: bool = ...) -> None: ...

class Key(_message.Message):
__slots__ = ["f64", "str"]
F64_FIELD_NUMBER: _ClassVar[int]
Expand Down Expand Up @@ -356,6 +344,20 @@ class MoreLikeThisQuery(_message.Message):
stop_words: _containers.RepeatedScalarFieldContainer[str]
def __init__(self, document: _Optional[str] = ..., min_doc_frequency: _Optional[int] = ..., max_doc_frequency: _Optional[int] = ..., min_term_frequency: _Optional[int] = ..., max_query_terms: _Optional[int] = ..., min_word_length: _Optional[int] = ..., max_word_length: _Optional[int] = ..., boost: _Optional[str] = ..., stop_words: _Optional[_Iterable[str]] = ...) -> None: ...

class MorphologyConfig(_message.Message):
__slots__ = ["derive_tenses_coefficient"]
DERIVE_TENSES_COEFFICIENT_FIELD_NUMBER: _ClassVar[int]
derive_tenses_coefficient: float
def __init__(self, derive_tenses_coefficient: _Optional[float] = ...) -> None: ...

class NerMatchesPromoter(_message.Message):
__slots__ = ["boost", "fields"]
BOOST_FIELD_NUMBER: _ClassVar[int]
FIELDS_FIELD_NUMBER: _ClassVar[int]
boost: float
fields: _containers.RepeatedScalarFieldContainer[str]
def __init__(self, boost: _Optional[float] = ..., fields: _Optional[_Iterable[str]] = ...) -> None: ...

class PhraseQuery(_message.Message):
__slots__ = ["field", "slop", "value"]
FIELD_FIELD_NUMBER: _ClassVar[int]
Expand Down Expand Up @@ -395,7 +397,7 @@ class Query(_message.Message):
def __init__(self, boolean: _Optional[_Union[BooleanQuery, _Mapping]] = ..., match: _Optional[_Union[MatchQuery, _Mapping]] = ..., regex: _Optional[_Union[RegexQuery, _Mapping]] = ..., term: _Optional[_Union[TermQuery, _Mapping]] = ..., phrase: _Optional[_Union[PhraseQuery, _Mapping]] = ..., range: _Optional[_Union[RangeQuery, _Mapping]] = ..., all: _Optional[_Union[AllQuery, _Mapping]] = ..., more_like_this: _Optional[_Union[MoreLikeThisQuery, _Mapping]] = ..., boost: _Optional[_Union[BoostQuery, _Mapping]] = ..., disjunction_max: _Optional[_Union[DisjunctionMaxQuery, _Mapping]] = ..., empty: _Optional[_Union[EmptyQuery, _Mapping]] = ..., exists: _Optional[_Union[ExistsQuery, _Mapping]] = ...) -> None: ...

class QueryParserConfig(_message.Message):
__slots__ = ["boolean_should_mode", "default_fields", "disjuction_max_mode", "exact_matches_promoter", "field_aliases", "field_boosts", "field_mappings", "inflection_configs", "missing_field_policy", "term_limit"]
__slots__ = ["boolean_should_mode", "default_fields", "disjuction_max_mode", "exact_matches_promoter", "field_aliases", "field_boosts", "missing_field_policy", "morphology_configs", "ner_matches_promoter", "query_language", "term_field_mapper_configs", "term_limit"]
class FieldAliasesEntry(_message.Message):
__slots__ = ["key", "value"]
KEY_FIELD_NUMBER: _ClassVar[int]
Expand All @@ -410,41 +412,45 @@ class QueryParserConfig(_message.Message):
key: str
value: float
def __init__(self, key: _Optional[str] = ..., value: _Optional[float] = ...) -> None: ...
class FieldMappingsEntry(_message.Message):
class MorphologyConfigsEntry(_message.Message):
__slots__ = ["key", "value"]
KEY_FIELD_NUMBER: _ClassVar[int]
VALUE_FIELD_NUMBER: _ClassVar[int]
key: str
value: FieldMapper
def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[FieldMapper, _Mapping]] = ...) -> None: ...
class InflectionConfigsEntry(_message.Message):
value: MorphologyConfig
def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[MorphologyConfig, _Mapping]] = ...) -> None: ...
class TermFieldMapperConfigsEntry(_message.Message):
__slots__ = ["key", "value"]
KEY_FIELD_NUMBER: _ClassVar[int]
VALUE_FIELD_NUMBER: _ClassVar[int]
key: str
value: InflectionConfig
def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[InflectionConfig, _Mapping]] = ...) -> None: ...
value: TermFieldMapperConfig
def __init__(self, key: _Optional[str] = ..., value: _Optional[_Union[TermFieldMapperConfig, _Mapping]] = ...) -> None: ...
BOOLEAN_SHOULD_MODE_FIELD_NUMBER: _ClassVar[int]
DEFAULT_FIELDS_FIELD_NUMBER: _ClassVar[int]
DISJUCTION_MAX_MODE_FIELD_NUMBER: _ClassVar[int]
EXACT_MATCHES_PROMOTER_FIELD_NUMBER: _ClassVar[int]
FIELD_ALIASES_FIELD_NUMBER: _ClassVar[int]
FIELD_BOOSTS_FIELD_NUMBER: _ClassVar[int]
FIELD_MAPPINGS_FIELD_NUMBER: _ClassVar[int]
INFLECTION_CONFIGS_FIELD_NUMBER: _ClassVar[int]
MISSING_FIELD_POLICY_FIELD_NUMBER: _ClassVar[int]
MORPHOLOGY_CONFIGS_FIELD_NUMBER: _ClassVar[int]
NER_MATCHES_PROMOTER_FIELD_NUMBER: _ClassVar[int]
QUERY_LANGUAGE_FIELD_NUMBER: _ClassVar[int]
TERM_FIELD_MAPPER_CONFIGS_FIELD_NUMBER: _ClassVar[int]
TERM_LIMIT_FIELD_NUMBER: _ClassVar[int]
boolean_should_mode: MatchQueryBooleanShouldMode
default_fields: _containers.RepeatedScalarFieldContainer[str]
disjuction_max_mode: MatchQueryDisjuctionMaxMode
exact_matches_promoter: ExactMatchesPromoter
field_aliases: _containers.ScalarMap[str, str]
field_boosts: _containers.ScalarMap[str, float]
field_mappings: _containers.MessageMap[str, FieldMapper]
inflection_configs: _containers.MessageMap[str, InflectionConfig]
missing_field_policy: MissingFieldPolicy
morphology_configs: _containers.MessageMap[str, MorphologyConfig]
ner_matches_promoter: NerMatchesPromoter
query_language: str
term_field_mapper_configs: _containers.MessageMap[str, TermFieldMapperConfig]
term_limit: int
def __init__(self, field_aliases: _Optional[_Mapping[str, str]] = ..., field_boosts: _Optional[_Mapping[str, float]] = ..., field_mappings: _Optional[_Mapping[str, FieldMapper]] = ..., term_limit: _Optional[int] = ..., default_fields: _Optional[_Iterable[str]] = ..., boolean_should_mode: _Optional[_Union[MatchQueryBooleanShouldMode, _Mapping]] = ..., disjuction_max_mode: _Optional[_Union[MatchQueryDisjuctionMaxMode, _Mapping]] = ..., exact_matches_promoter: _Optional[_Union[ExactMatchesPromoter, _Mapping]] = ..., missing_field_policy: _Optional[_Union[MissingFieldPolicy, str]] = ..., inflection_configs: _Optional[_Mapping[str, InflectionConfig]] = ...) -> None: ...
def __init__(self, field_aliases: _Optional[_Mapping[str, str]] = ..., field_boosts: _Optional[_Mapping[str, float]] = ..., term_field_mapper_configs: _Optional[_Mapping[str, TermFieldMapperConfig]] = ..., term_limit: _Optional[int] = ..., default_fields: _Optional[_Iterable[str]] = ..., boolean_should_mode: _Optional[_Union[MatchQueryBooleanShouldMode, _Mapping]] = ..., disjuction_max_mode: _Optional[_Union[MatchQueryDisjuctionMaxMode, _Mapping]] = ..., exact_matches_promoter: _Optional[_Union[ExactMatchesPromoter, _Mapping]] = ..., missing_field_policy: _Optional[_Union[MissingFieldPolicy, str]] = ..., morphology_configs: _Optional[_Mapping[str, MorphologyConfig]] = ..., query_language: _Optional[str] = ..., ner_matches_promoter: _Optional[_Union[NerMatchesPromoter, _Mapping]] = ...) -> None: ...

class RandomDocument(_message.Message):
__slots__ = ["document", "index_alias", "score"]
Expand Down Expand Up @@ -622,6 +628,12 @@ class StatsResult(_message.Message):
sum: float
def __init__(self, count: _Optional[int] = ..., sum: _Optional[float] = ..., min: _Optional[float] = ..., max: _Optional[float] = ..., avg: _Optional[float] = ...) -> None: ...

class TermFieldMapperConfig(_message.Message):
__slots__ = ["fields"]
FIELDS_FIELD_NUMBER: _ClassVar[int]
fields: _containers.RepeatedScalarFieldContainer[str]
def __init__(self, fields: _Optional[_Iterable[str]] = ...) -> None: ...

class TermQuery(_message.Message):
__slots__ = ["field", "value"]
FIELD_FIELD_NUMBER: _ClassVar[int]
Expand Down
6 changes: 3 additions & 3 deletions summa-server/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
edition = "2021"
name = "summa-server"
version = "0.16.0"
version = "0.17.0"
license-file = "LICENSE"
description = "Fast full-text search server"
homepage = "https://github.com/izihawa/summa"
Expand Down Expand Up @@ -29,7 +29,7 @@ maintenance = { status = "actively-developed" }
tonic-build = { version = "0.9.1", default-features = false, features = ["prost", "transport"] }

[dev-dependencies]
summa-core = { version = "0.16.0", path = "../summa-core", features = ["fs", "hyper-external-request", "nn", "tokio-rt"] }
summa-core = { version = "0.17.0", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] }
tempdir = "0.3.7"

[dependencies]
Expand Down Expand Up @@ -58,7 +58,7 @@ serde = { workspace = true }
serde_derive = "1.0"
serde_json = { workspace = true }
serde_yaml = { workspace = true }
summa-core = { version = "0.16.0", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] }
summa-core = { version = "0.17.0", path = "../summa-core", features = ["fs", "hyper-external-request", "tokio-rt"] }
summa-proto = { workspace = true, features = ["grpc"] }
take_mut = { workspace = true }
tantivy = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion summa-wasm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ prost = { workspace = true }
serde = { workspace = true }
serde-wasm-bindgen = "0.5"
strfmt = { workspace = true }
summa-core = { version = "0.16.0", path = "../summa-core", default_features = false }
summa-core = { version = "0.17.0", path = "../summa-core", default_features = false }
summa-proto = { workspace = true }
tantivy = { workspace = true, features = ["wasm"] }
thiserror = { workspace = true }
Expand Down

0 comments on commit c78b3c4

Please sign in to comment.