Skip to content

Commit

Permalink
[fix] Use skip arg for real
Browse files Browse the repository at this point in the history
  • Loading branch information
ppodolsky committed Apr 20, 2024
1 parent b15fc77 commit f1d4dc4
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 33 deletions.
13 changes: 6 additions & 7 deletions summa-core/src/components/index_holder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,8 @@ impl IndexHolder {
/// Index generic `SummaDocument`
///
/// `IndexUpdater` bounds unbounded `SummaDocument` inside
pub async fn index_document(&self, document: SummaDocument<'_>, skip_updated_at_modification: bool) -> SummaResult<()> {
let document = document.bound_with(&self.index.schema()).try_into()?;
pub async fn index_document(&self, document_bytes: &[u8], skip_updated_at_modification: bool) -> SummaResult<()> {
let document = SummaDocument::parse_json_bytes(&self.index.schema(), document_bytes, skip_updated_at_modification)?;
self.index_writer_holder()?.read().await.index_document(document, self.conflict_strategy())
}

Expand Down Expand Up @@ -679,18 +679,17 @@ pub mod tests {
use std::error::Error;
use std::sync::Arc;

use serde::Serialize;
use serde_json::json;
use summa_proto::proto;
use summa_proto::proto::ConflictStrategy;
use tantivy::collector::{Count, TopDocs};
use tantivy::query::{AllQuery, TermQuery};
use tantivy::schema::{IndexRecordOption, Value};
use tantivy::{doc, Document, IndexBuilder, TantivyDocument, Term};
use tantivy::{doc, IndexBuilder, TantivyDocument, Term};

use crate::components::index_holder::register_default_tokenizers;
use crate::components::test_utils::{create_test_schema, generate_documents};
use crate::components::IndexWriterHolder;
use crate::components::{IndexWriterHolder, SummaDocument};
use crate::configs::core::WriterThreads;

#[test]
Expand All @@ -712,7 +711,7 @@ pub mod tests {
)?;
let mut last_document = None;
for document in generate_documents(&schema, 10000) {
let document: TantivyDocument = document.bound_with(&schema).try_into()?;
let document: TantivyDocument = SummaDocument::parse_json_bytes(&schema, document.as_bytes(), false)?;
last_document = Some(document.clone());
index_writer_holder.index_document(document, ConflictStrategy::Merge)?;
}
Expand All @@ -724,7 +723,7 @@ pub mod tests {
);
index_writer_holder.commit()?;
for document in generate_documents(&schema, 1000) {
let document = document.bound_with(&schema).try_into()?;
let document = SummaDocument::parse_json_bytes(&schema, document.as_bytes(), false)?;
index_writer_holder.index_document(modified_last_document.clone(), ConflictStrategy::Merge)?;
index_writer_holder.index_document(document, ConflictStrategy::Merge)?;
}
Expand Down
17 changes: 8 additions & 9 deletions summa-core/src/components/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@ pub mod test_utils {
use rand::rngs::SmallRng;
use rand::{Rng, SeedableRng};
use serde_json::json;
use tantivy::doc;
use tantivy::schema::{IndexRecordOption, JsonObjectOptions, Schema, TextFieldIndexing, TextOptions, FAST, INDEXED, STORED};

use crate::components::SummaDocument;
use tantivy::{doc, Document};

pub fn create_test_schema() -> Schema {
let mut schema_builder = Schema::builder();
Expand Down Expand Up @@ -117,34 +115,35 @@ pub mod test_utils {
body_power: usize,
tag_prefix: &'a str,
tag_power: usize,
) -> SummaDocument<'a> {
) -> String {
static DOC_ID: AtomicI64 = AtomicI64::new(1);

let issued_at = 1674041452i64 - rng.gen_range(100..1000);
let doc_id = doc_id.unwrap_or_else(|| DOC_ID.fetch_add(1, Ordering::SeqCst));

SummaDocument::TantivyDocument(doc!(
doc!(
schema.get_field("id").expect("no expected field") => doc_id,
schema.get_field("title").expect("no expected field") => generate_sentence(rng, title_prefix, title_power, 3),
schema.get_field("body").expect("no expected field") => generate_sentence(rng, body_prefix, body_power, 50),
schema.get_field("tags").expect("no expected field") => generate_sentence(rng, tag_prefix, tag_power, 5),
schema.get_field("issued_at").expect("no expected field") => issued_at,
schema.get_field("metadata").expect("no expected field") => json!({"id": doc_id}),
))
)
.to_json(schema)
}

pub fn generate_unique_document<'a>(schema: &'a Schema, title: &'a str) -> SummaDocument<'a> {
pub fn generate_unique_document<'a>(schema: &'a Schema, title: &'a str) -> String {
generate_document(None, &mut SmallRng::seed_from_u64(42), schema, title, 0, "body", 1000, "tag", 100)
}

pub fn generate_documents(schema: &Schema, n: usize) -> Vec<SummaDocument> {
pub fn generate_documents(schema: &Schema, n: usize) -> Vec<String> {
let mut rng = SmallRng::seed_from_u64(42);
(0..n)
.map(|_| generate_document(None, &mut rng, schema, "title", 100, "body", 1000, "tag", 10))
.collect()
}

pub fn generate_documents_with_doc_id_gen_and_rng<'a>(doc_id_gen: AtomicI64, rng: &mut SmallRng, schema: &'a Schema, n: usize) -> Vec<SummaDocument<'a>> {
pub fn generate_documents_with_doc_id_gen_and_rng<'a>(doc_id_gen: AtomicI64, rng: &mut SmallRng, schema: &'a Schema, n: usize) -> Vec<String> {
(0..n)
.map(|_| {
generate_document(
Expand Down
12 changes: 6 additions & 6 deletions summa-core/src/components/summa_document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,15 @@ impl<'a> SummaDocument<'a> {
}

/// Build a document object from a json-object.
pub fn parse_and_setup_document(&self, schema: &Schema, doc_json: &str, skip_updated_at_modification: bool) -> SummaResult<TantivyDocument> {
pub fn parse_and_setup_document(schema: &Schema, doc_json: &str, skip_updated_at_modification: bool) -> SummaResult<TantivyDocument> {
let mut json_obj: serde_json::Map<String, JsonValue> =
serde_json::from_str(doc_json).map_err(|_| DocumentParsingError::InvalidJson(doc_json.to_owned()))?;
process_dynamic_fields(schema, &mut json_obj, skip_updated_at_modification);
self.json_object_to_doc(schema, json_obj)
Self::json_object_to_doc(schema, json_obj)
}

/// Build a document object from a json-object.
pub fn json_object_to_doc(&self, schema: &Schema, json_obj: serde_json::Map<String, JsonValue>) -> SummaResult<TantivyDocument> {
pub fn json_object_to_doc(schema: &Schema, json_obj: serde_json::Map<String, JsonValue>) -> SummaResult<TantivyDocument> {
let mut doc = TantivyDocument::default();
for (field_name, json_value) in json_obj {
if let Ok(field) = schema.get_field(&field_name) {
Expand All @@ -224,9 +224,9 @@ impl<'a> SummaDocument<'a> {
Ok(doc)
}

pub fn parse_json_bytes(&self, schema: &Schema, json_bytes: &[u8], skip_updated_at_modification: bool) -> SummaResult<TantivyDocument> {
pub fn parse_json_bytes(schema: &Schema, json_bytes: &[u8], skip_updated_at_modification: bool) -> SummaResult<TantivyDocument> {
let text_document = from_utf8(json_bytes).map_err(ValidationError::Utf8)?;
let parsed_document = self.parse_and_setup_document(schema, text_document, skip_updated_at_modification)?;
let parsed_document = Self::parse_and_setup_document(schema, text_document, skip_updated_at_modification)?;
Ok(parsed_document)
}
}
Expand All @@ -236,7 +236,7 @@ impl<'a> TryInto<TantivyDocument> for SummaDocument<'a> {

fn try_into(self) -> SummaResult<TantivyDocument> {
match self {
SummaDocument::BoundJsonBytes((schema, json_bytes)) => self.parse_json_bytes(schema, json_bytes, false),
SummaDocument::BoundJsonBytes((schema, json_bytes)) => Self::parse_json_bytes(schema, json_bytes, false),
SummaDocument::UnboundJsonBytes(_) => Err(Error::UnboundDocument),
SummaDocument::TantivyDocument(document) => Ok(document),
}
Expand Down
2 changes: 1 addition & 1 deletion summa-server/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
edition = "2021"
name = "summa-server"
version = "0.23.5"
version = "0.23.6"
license-file = "LICENSE"
description = "Fast full-text search server"
homepage = "https://github.com/izihawa/summa"
Expand Down
4 changes: 2 additions & 2 deletions summa-server/src/apis/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::io::ErrorKind;
use std::sync::Arc;
use std::time::Instant;

use summa_core::components::{IndexHolder, NamedFieldDocument, SummaDocument};
use summa_core::components::{IndexHolder, NamedFieldDocument};
use summa_core::configs::ConfigProxy;
use summa_core::utils::sync::Handler;
use summa_core::validators;
Expand Down Expand Up @@ -236,7 +236,7 @@ impl proto::index_api_server::IndexApi for IndexApiImpl {
self.index_service
.get_index_holder(&proto_request.index_name)
.await?
.index_document(SummaDocument::UnboundJsonBytes(&proto_request.document), proto_request.skip_updated_at_modification)
.index_document(&proto_request.document, proto_request.skip_updated_at_modification)
.await
.map_err(crate::errors::Error::from)?;
let response = proto::IndexDocumentResponse {};
Expand Down
6 changes: 3 additions & 3 deletions summa-server/src/services/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -878,10 +878,10 @@ pub(crate) mod tests {
.await?;

for d in generate_documents(index_holder.schema(), 1000) {
index_holder.index_document(d, false).await?;
index_holder.index_document(d.as_bytes(), false).await?;
}
index_holder
.index_document(generate_unique_document(index_holder.schema(), "testtitle"), false)
.index_document(generate_unique_document(index_holder.schema(), "testtitle").as_bytes(), false)
.await?;
index_service.commit(&index_holder, false).await?;

Expand Down Expand Up @@ -922,7 +922,7 @@ pub(crate) mod tests {
let mut rng = SmallRng::seed_from_u64(42);
for _ in 0..4 {
for d in generate_documents_with_doc_id_gen_and_rng(AtomicI64::new(1), &mut rng, &schema, 300) {
index_holder.index_document(d, false).await?;
index_holder.index_document(d.as_bytes(), false).await?;
}
index_service.commit(&index_holder, false).await?;
}
Expand Down
7 changes: 2 additions & 5 deletions summa-wasm/crate/web_index_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use js_sys::Uint8Array;
use prost::Message;
use serde::Serialize;
use serde_wasm_bindgen::Serializer;
use summa_core::components::{IndexHolder, IndexRegistry, SummaDocument};
use summa_core::components::{IndexHolder, IndexRegistry};
use summa_core::configs::{ConfigProxy, DirectProxy};
use summa_core::directories::DefaultExternalRequestGenerator;
use summa_core::errors::SummaResult;
Expand Down Expand Up @@ -140,10 +140,7 @@ impl WrappedIndexRegistry {
#[wasm_bindgen]
pub async fn index_document(&self, index_name: &str, document: &str) -> Result<(), JsValue> {
let index_holder = self.index_registry.get_index_holder_by_name(index_name).await.map_err(Error::from)?;
index_holder
.index_document(SummaDocument::UnboundJsonBytes(document.as_bytes()), false)
.await
.map_err(Error::from)?;
index_holder.index_document(document.as_bytes(), false).await.map_err(Error::from)?;
Ok(())
}

Expand Down

0 comments on commit f1d4dc4

Please sign in to comment.