Skip to content

Commit

Permalink
Update usage of Document::getContent to getText
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark Pollack committed Dec 12, 2024
1 parent f252b24 commit 5b11501
Show file tree
Hide file tree
Showing 8 changed files with 12 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void testOnlyHeadersWithParagraphs() {
List<Document> documents = reader.get();

assertThat(documents).hasSize(4)
.extracting(Document::getMetadata, Document::getContent)
.extracting(Document::getMetadata, Document::getText)
.containsOnly(tuple(Map.of("category", "header_1", "title", "Header 1a"),
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur diam eros, laoreet sit amet cursus vitae, varius sed nisi. Cras sit amet quam quis velit commodo porta consectetur id nisi. Phasellus tincidunt pulvinar augue."),
tuple(Map.of("category", "header_1", "title", "Header 1b"),
Expand All @@ -57,7 +57,7 @@ void testWithFormatting() {
List<Document> documents = reader.get();

assertThat(documents).hasSize(2)
.extracting(Document::getMetadata, Document::getContent)
.extracting(Document::getMetadata, Document::getText)
.containsOnly(tuple(Map.of("category", "header_1", "title", "This is a fancy header name"),
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt velit non bibendum gravida. Cras accumsan tincidunt ornare. Donec hendrerit consequat tellus blandit accumsan. Aenean aliquam metus at arcu elementum dignissim."),
tuple(Map.of("category", "header_3", "title", "Header 3"),
Expand All @@ -75,7 +75,7 @@ void testDocumentDividedViaHorizontalRules() {
List<Document> documents = reader.get();

assertThat(documents).hasSize(7)
.extracting(Document::getMetadata, Document::getContent)
.extracting(Document::getMetadata, Document::getText)
.containsOnly(tuple(Map.of(),
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec tincidunt velit non bibendum gravida."),
tuple(Map.of(),
Expand Down Expand Up @@ -187,7 +187,7 @@ void testBlockquote() {
List<Document> documents = reader.get();

assertThat(documents).hasSize(2)
.extracting(Document::getMetadata, Document::getContent)
.extracting(Document::getMetadata, Document::getText)
.containsOnly(tuple(Map.of(),
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur diam eros, laoreet sit amet cursus vitae, varius sed nisi. Cras sit amet quam quis velit commodo porta consectetur id nisi. Phasellus tincidunt pulvinar augue."),
tuple(Map.of("category", "blockquote"),
Expand Down Expand Up @@ -219,7 +219,7 @@ void testLists() {
List<Document> documents = reader.get();

assertThat(documents).hasSize(2)
.extracting(Document::getMetadata, Document::getContent)
.extracting(Document::getMetadata, Document::getText)
.containsOnly(tuple(Map.of("category", "header_2", "title", "Ordered list"),
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Curabitur diam eros, laoreet sit amet cursus vitae, varius sed nisi. Cras sit amet quam quis velit commodo porta consectetur id nisi. Phasellus tincidunt pulvinar augue. Proin vel laoreet leo, sed luctus augue. Sed et ligula commodo, commodo lacus at, consequat turpis. Maecenas eget sapien odio. Pellentesque auctor pharetra eros, viverra sodales lorem aliquet id. Curabitur semper nisi vel sem interdum suscipit. Maecenas urna lectus, pellentesque in accumsan aliquam, congue eu libero. Ut rhoncus nec justo a porttitor."),
tuple(Map.of("category", "header_2", "title", "Unordered list"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void classpathRead() {

assertThat(docs).hasSize(4);

String allText = docs.stream().map(Document::getContent).collect(Collectors.joining(System.lineSeparator()));
String allText = docs.stream().map(Document::getText).collect(Collectors.joining(System.lineSeparator()));

assertThat(allText).doesNotContain(
List.of("Page 1 of 4", "Page 2 of 4", "Page 3 of 4", "Page 4 of 4", "PDF Bookmark Sample"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.ai.chat.prompt.PromptTemplate;
import org.springframework.ai.document.Document;
import org.springframework.ai.model.Content;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.ai.vectorstore.filter.Filter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ default List<float[]> embed(List<Document> documents, EmbeddingOptions options,
List<float[]> embeddings = new ArrayList<>(documents.size());
List<List<Document>> batch = batchingStrategy.batch(documents);
for (List<Document> subBatch : batch) {
List<String> texts = subBatch.stream().map(Document::getContent).toList();
List<String> texts = subBatch.stream().map(Document::getText).toList();
EmbeddingRequest request = new EmbeddingRequest(texts, options);
EmbeddingResponse response = this.call(request);
for (int i = 0; i < subBatch.size(); i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public static List<String> documents(VectorStoreObservationContext context) {
return List.of();
}

return context.getQueryResponse().stream().map(Document::getContent).toList();
return context.getQueryResponse().stream().map(Document::getText).toList();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ void whenDuplicatedDocumentsThenOnlyFirstOccurrenceIsKept() {

assertThat(result).hasSize(4);
assertThat(result).extracting(Document::getId).containsExactlyInAnyOrder("1", "2", "3", "4");
assertThat(result).extracting(Document::getContent).contains("Content 2");
assertThat(result).extracting(Document::getContent).doesNotContain("Content 2 Duplicate");
assertThat(result).extracting(Document::getText).contains("Content 2");
assertThat(result).extracting(Document::getText).doesNotContain("Content 2 Duplicate");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ public class CricketWorldCupHanaController {
@GetMapping("/ai/hana-vector-store/cricket-world-cup")
public Map<String, String> hanaVectorStoreSearch(@RequestParam(value = "message") String message) {
var documents = this.hanaCloudVectorStore.similaritySearch(message);
var inlined = documents.stream().map(Document::getContent).collect(Collectors.joining(System.lineSeparator()));
var inlined = documents.stream().map(Document::getText).collect(Collectors.joining(System.lineSeparator()));
var similarDocsMessage = new SystemPromptTemplate("Based on the following: {documents}")
.createMessage(Map.of("documents", inlined));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public ResponseEntity<String> handleFileUpload(@RequestParam("pdf") MultipartFil
@GetMapping("/ai/hana-vector-store/cricket-world-cup")
public Map<String, String> hanaVectorStoreSearch(@RequestParam("message") String message) {
var documents = this.hanaCloudVectorStore.similaritySearch(message);
var inlined = documents.stream().map(Document::getContent).collect(Collectors.joining(System.lineSeparator()));
var inlined = documents.stream().map(Document::getText).collect(Collectors.joining(System.lineSeparator()));
var similarDocsMessage = new SystemPromptTemplate("Based on the following: {documents}")
.createMessage(Map.of("documents", inlined));

Expand Down

0 comments on commit 5b11501

Please sign in to comment.