From 055b1def4442767098b143af34468ee9c51dd863 Mon Sep 17 00:00:00 2001 From: tomsmoker Date: Sat, 20 Apr 2024 19:20:39 -0700 Subject: [PATCH] v0.0.4: query_graph_with_schema --- .github/workflows/main.yml | 52 +++++ README.md | 84 +++++++- docs/api.md | 194 +++++++++++++++++- docs/tutorial.md | 101 ++++++++- examples/create_graph_from_questions.ipynb | 183 +++++++++++++++++ ...o.ipynb => create_graph_from_schema.ipynb} | 56 ++++- src/whyhow/__init__.py | 2 +- src/whyhow/apis/graph.py | 47 ++++- src/whyhow/schemas/common.py | 48 ++++- src/whyhow/schemas/graph.py | 14 +- 10 files changed, 747 insertions(+), 34 deletions(-) create mode 100644 .github/workflows/main.yml create mode 100644 examples/create_graph_from_questions.ipynb rename examples/{demo.ipynb => create_graph_from_schema.ipynb} (65%) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..b05e001 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,52 @@ +name: all + +on: + pull_request: + push: + branches: [main] + +jobs: + build: + + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + python-version: ['3.10'] + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + + - name: Lint with flake8 + run: | + flake8 src tests examples + + - name: Check style with black + run: | + black src tests examples + + - name: Run security check + run: | + bandit -qr -c pyproject.toml src examples + + - name: Run import check + run: | + isort --check src tests examples + + - name: Run mypy + run: | + mypy src + + - name: Test with pytest + run: | + pytest --color=yes diff --git a/README.md b/README.md index af6a26b..3d343e5 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![Checked with mypy](https://img.shields.io/badge/mypy-checked-blue)](https://mypy-lang.org/) [![Whyhow Discord](https://dcbadge.vercel.app/api/server/9bWqrsxgHr?compact=true&style=flat)](https://discord.gg/9bWqrsxgHr) -The WhyHow Knowledge Graph Creation SDK enables you to quickly and easily build automated knowledge graphs tailored to your unique worldview. Instantly build, extend, and query well-scoped KGs using a raw PDF and simple seed concepts in natural language. This version leverages OpenAI for embeddings and NLP, Pinecone serverless for scalable vector search and storage, and Neo4j for graph data storage and management. +The WhyHow Knowledge Graph Creation SDK enables you to quickly and easily build automated knowledge graphs tailored to your unique worldview. Instantly build, extend, and query well-scoped KGs with your data. # Installation @@ -74,15 +74,91 @@ Your namespace is a logical grouping of the raw data you upload, the seed concep namespace = "harry-potter" documents = ["files/harry_potter_and_the_philosophers_stone.pdf","files/harry_potter_and_the_chamber_of_secrets.pdf"] -add_docs_response = client.graph.add_documents(namespace, documents) -print(add_docs_response) +documents_response = client.graph.add_documents(namespace, documents) +print(documents_response) # Adding your documents ``` ## Create a graph -Tell the WhyHow SDK what you care about by providing a list of concepts in the form of natural language questions. Using these questions, we create a small ontology to guide extraction of entities and relationships that are most relevant to your use case. We then construct triples and generate a graph. +You can create a graph in two different ways. First, you can create a graph using a user-defined schema, giving you complete control over the types of entities and relationships that are extracted and used to build the graph. Or, you can create a graph using a set of seed questions. In this case, WhyHow will automatically extract entities and relationships that are most applicable to the things you want to know, and construct a graph from these concepts. + +Create graph with **schema** if... + +1. Your graph must adhere to a consistent structure. +2. You are very familiar with the structure of your raw documents. +3. You need comprehensive extraction of concepts across the entire document. + +Create graph with **seed questions** if... + +1. You are unsure as to which relationships and patterns you'd like to build into your graph. +2. You want to build your graph with only the most semantically similar raw data. + +### Create a graph with schema + +Tell the WhyHow SDK exactly which entities, relationships, and patterns you'd like to extract and build into your graph by defining them in a JSON-based schema. + +```shell + +#schema.json + +{ + "entities": [ + { + "name": "character", + "description": "A person appearing in the book, e.g., Harry Potter, Ron Weasley, Hermione Granger, Albus Dumbledore." + }, + { + "name": "object", + "description": "Inanimate items that characters use or interact with, e.g., wand, Philosopher's Stone, Invisibility Cloak, broomstick." + } + ... + ], + "relations": [ + { + "name": "friends with", + "description": "Denotes a friendly relationship between characters." + }, + { + "name": "interacts with", + "description": "Describes a scenario in which a character engages with another character, creature, or object." + }, + ... + ], + "patterns": [ + { + "head": "character", + "relation": "friends with", + "tail": "character", + "description": "One character is friends with another, e.g., Harry Potter is friends with Ron Weasley." + }, + { + "head": "character", + "relation": "interacts with", + "tail": "object", + "description": "A character interacting with an object, e.g., Harry Potter interacts with the Invisibility Cloak." + } + ] +} + +``` + +Using this schema, we extract relevant concepts from your raw data, construct triples, and generate a graph according to the patterns you define. + +```shell +# Create graph from schema + +schema = "files/schema.json" +create_graph_with_schema_response = client.graph.create_graph_from_schema(namespace, schema) +print(create_graph_with_schema_response) +# Creating your graph + +``` + +### Create a graph with seed questions + +Tell the WhyHow SDK what you care about by providing a list of concepts in the form of natural language questions. Using these questions, we create a small ontology to guide extraction of entities and relationships that are most relevant to your use case, then construct a graph. ```shell diff --git a/docs/api.md b/docs/api.md index cf5aa07..c2f2fed 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1 +1,193 @@ -# Reference +Here's the generated `api.md` file for your mkdocs based on the provided code files: + +```markdown +# API Reference + +This document provides a reference for the WhyHow API, which allows you to interact with the graph functionality. + +## GraphAPI + +The `GraphAPI` class provides methods to interact with the graph API synchronously. + +### `add_documents` + +```python +def add_documents(self, namespace: str, documents: list[str]) -> str +``` + +Add documents to the graph. + +#### Parameters + +- `namespace` (str): The namespace of the graph. +- `documents` (list[str]): The documents to add. + +#### Returns + +- (str): The response message. + +#### Raises + +- `ValueError`: If no documents are provided, not all documents exist, only PDFs are supported, PDFs are too large (limit: 8MB), or too many documents are provided (limit: 3 files during the beta). + +### `create_graph` + +```python +def create_graph(self, namespace: str, questions: list[str]) -> str +``` + +Create a new graph. + +#### Parameters + +- `namespace` (str): The namespace of the graph to create. +- `questions` (list[str]): The seed concepts to initialize the graph with. + +#### Returns + +- (str): The response message. + +#### Raises + +- `ValueError`: If no questions are provided. + +### `create_graph_from_schema` + +```python +def create_graph_from_schema(self, namespace: str, schema_file: str) -> str +``` + +Create a new graph based on a user-defined schema. + +#### Parameters + +- `namespace` (str): The namespace of the graph to create. +- `schema_file` (str): The schema file to use to build the graph. + +#### Returns + +- (str): The response message. + +#### Raises + +- `ValueError`: If no schema is provided. + +### `query_graph` + +```python +def query_graph(self, namespace: str, query: str) -> QueryGraphReturn +``` + +Query the graph. + +#### Parameters + +- `namespace` (str): The namespace of the graph. +- `query` (str): The query to run. + +#### Returns + +- (`QueryGraphReturn`): The answer, triples, and Cypher query. + +## Schemas + +The WhyHow API uses Pydantic models to define the request and response schemas. + +### `AddDocumentsResponse` + +```python +class AddDocumentsResponse(BaseResponse): + """Schema for the response body of the add documents endpoint.""" + + namespace: str + message: str +``` + +### `CreateQuestionGraphRequest` + +```python +class CreateQuestionGraphRequest(BaseRequest): + """Schema for the request body of the create graph endpoint.""" + + questions: list[str] +``` + +### `CreateSchemaGraphRequest` + +```python +class CreateSchemaGraphRequest(BaseRequest): + """Schema for the request body of the create graph endpoint.""" + + graph_schema: SchemaModel +``` + +### `CreateGraphResponse` + +```python +class CreateGraphResponse(BaseResponse): + """Schema for the response body of the create graph endpoint.""" + + namespace: str + message: str +``` + +### `QueryGraphRequest` + +```python +class QueryGraphRequest(BaseRequest): + """Schema for the request body of the query graph endpoint.""" + + query: str +``` + +### `QueryGraphResponse` + +```python +class QueryGraphResponse(BaseResponse): + """Schema for the response body of the query graph endpoint.""" + + namespace: str + answer: str +``` + +### `QueryGraphReturn` + +```python +class QueryGraphReturn(BaseReturn): + """Schema for the return value of the query graph endpoint.""" + + answer: str +``` + +## Base Classes + +The WhyHow API uses the following base classes for the API schemas: + +### `APIBase` + +```python +class APIBase(BaseModel, ABC): + """Base class for API schemas.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + client: Client + prefix: str = "" +``` + +### `AsyncAPIBase` + +```python +class AsyncAPIBase(BaseModel, ABC): + """Base class for async API schemas.""" + + model_config = ConfigDict(arbitrary_types_allowed=True) + + client: AsyncClient + prefix: str = "" +``` +``` + +This `api.md` file provides an overview of the `GraphAPI` class and its methods, along with the request and response schemas used by the API. It also includes information about the base classes used for the API schemas. + +You can include this file in your mkdocs documentation to provide a reference for the WhyHow API. \ No newline at end of file diff --git a/docs/tutorial.md b/docs/tutorial.md index e13e68e..43dd399 100644 --- a/docs/tutorial.md +++ b/docs/tutorial.md @@ -1,6 +1,6 @@ # Tutorial -This is a straightforward tutorial on how ot build and query a knowledge graph using PDF texts of Harry Potter books using WhyHow SDK. This example will guide you through importing documents from the Harry Potter series into the knowledge graph, then querying it for insights related to the series. +This is a straightforward tutorial on how to build and query a knowledge graph using PDF texts of Harry Potter books using WhyHow SDK. This example will guide you through importing documents from the Harry Potter series into the knowledge graph, then querying it for insights related to the series. ## Environment Setup @@ -24,7 +24,7 @@ export NEO4J_URL= ## Install WhyHow SDK -If you haven't already, install the `WhyHow SDK `using pip: +If you haven't already, install the `WhyHow SDK` using pip: ```shell pip install whyhow @@ -38,12 +38,12 @@ With your environment variables set, you can now configure the `WhyHow` client i import os from whyhow import WhyHow -client = WhyHow(api_key=) +client = WhyHow(api_key=) ``` -## Creating the Knowledge Graph +## Option 1 - Create the Knowledge Graph from a schema -Define the namespace for your project and specify the paths to your Harry Potter series documents. Your namespace is a logical grouping of the raw data you upload, the seed concepts you define, and the graphs you create. Namespaces are meant to be tightly scoped to your use case. You can create as many namespaces as you want. +First, you need to define the namespace for your project and specify the paths to your Harry Potter book documents. Your namespace is a logical grouping of the raw data you upload, the schema you define, and the graphs you create. Namespaces are meant to be tightly scoped to your use case. You can create as many namespaces as you want. ```shell namespace = "harry-potter" @@ -59,7 +59,82 @@ print("Documents Added:", documents_response) ``` -Create the knowledge graph from the uploaded documents: +Next, you must create a schema which defines the entities, relationships, and patterns you'd like to use to construct the graph. Create this and save it as a JSON file. + +```shell + +#schema.json + +{ + "entities": [ + { + "name": "character", + "description": "A person appearing in the book, e.g., Harry Potter, Ron Weasley, Hermione Granger, Albus Dumbledore." + }, + { + "name": "object", + "description": "Inanimate items that characters use or interact with, e.g., wand, Philosopher's Stone, Invisibility Cloak, broomstick." + } + ], + "relations": [ + { + "name": "friends with", + "description": "Denotes a friendly relationship between characters." + }, + { + "name": "interacts with", + "description": "Describes a scenario in which a character engages with another character, creature, or object." + }, + ], + "patterns": [ + { + "head": "character", + "relation": "friends with", + "tail": "character", + "description": "One character is friends with another, e.g., Harry Potter is friends with Ron Weasley." + }, + { + "head": "character", + "relation": "interacts with", + "tail": "object", + "description": "A character interacting with an object, e.g., Harry Potter interacts with the Invisibility Cloak." + } + ] +} + +``` + +Then, create the graph using the schema and the uploaded documents: + +```shell +# Create graph from schema + +schema = "./schema.json" +create_graph_with_schema_response = client.graph.create_graph_from_schema(namespace, schema) +print(create_graph_with_schema_response) +# Creating your graph + +``` + +## Option 2 - Create the Knowledge Graph from seed questions + +Alternatively, you can create a graph using seed concepts in the form of questions written in natural language. We'll create a new namespace and upload the same data. + +```shell +namespace = "harry-potter-2" +documents = [ + "path/to/harry_potter_and_the_philosophers_stone.pdf", + "path/to/harry_potter_and_the_chamber_of_secrets.pdf" + # Add paths to the rest of the Harry Potter series documents +] + +# Add documents to your namespace +documents_response = client.graph.add_documents(namespace, documents) +print("Documents Added:", documents_response) + +``` + +Create the knowledge graph from the seed questions and the uploaded documents: ```shell questions = ["What does Harry look like?","What does Hermione look like?","What does Ron look like?"] @@ -70,13 +145,19 @@ print("Extracted Graph:", extracted_graph) ## Querying the Knowledge Graph -With the graph created, you can now query it to find specific information. For example, to find out who wears a cloak: +With the graphs created, you can now query them to find specific information: ```shell +# Query graph created from schema +query = "Who is Harry friends with?" +namespace = "harry-potter" +schema_query_response = client.graph.query_graph(namespace, query) +print("Query Response:", query_response) + +# Query graph created from seed questions query = "Who wears a Cloak?" -query_response = client.graph.query_graph(namespace, query) +namespace = "harry-potter-2" +seed_questions_query_response = client.graph.query_graph(namespace, query) print("Query Response:", query_response) ``` - -This query returns information based on the interactions and mentions of cloaks in the Harry Potter series. Even if you did not explicitly ask the SDK to extract information on cloaks and clothing, we are still able to uncover relevant information like this, illustrating the power of our AI-enabled knowledge graphs creation experience. diff --git a/examples/create_graph_from_questions.ipynb b/examples/create_graph_from_questions.ipynb new file mode 100644 index 0000000..e1f3e84 --- /dev/null +++ b/examples/create_graph_from_questions.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create a knowledge graph with questions" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from whyhow import WhyHow" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "client = WhyHow(\n", + " api_key = os.environ.get(\"WHYHOW_API_KEY\"),\n", + " openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n", + " pinecone_api_key=os.getenv(\"PINECONE_API_KEY\"),\n", + " neo4j_url=os.getenv(\"NEO4J_URI\"),\n", + " neo4j_user=os.getenv(\"NEO4J_USERNAME\"),\n", + " neo4j_password=os.getenv(\"NEO4J_PASSWORD\"),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "namespace = \"ynab_schema\"\n", + "documents = [\"../data/YNAB_money_guide.pdf\"]\n", + "questions = [\"What are the best ways to save money?\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add documents to database" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your documents are being added in the background.\n" + ] + } + ], + "source": [ + "# Add documents\n", + "documents_response = client.graph.add_documents(\n", + " namespace = namespace, \n", + " documents = documents\n", + ")\n", + "\n", + "print(documents_response)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create the graph" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Your graph creation has started.\n" + ] + } + ], + "source": [ + "# Create a graph\n", + "extracted_graph = client.graph.create_graph(\n", + " namespace = namespace, \n", + " questions = questions\n", + ")\n", + "\n", + "print(extracted_graph)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Query the graph" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "answer='Some ways to save money include creating a budget, cutting unnecessary expenses, and setting financial goals.'\n" + ] + } + ], + "source": [ + "# Query the graph\n", + "query = \"What are the best ways to save money?\"\n", + "query_response = client.graph.query_graph(\n", + " namespace = namespace, \n", + " query = query\n", + ")\n", + "\n", + "print(query_response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/demo.ipynb b/examples/create_graph_from_schema.ipynb similarity index 65% rename from examples/demo.ipynb rename to examples/create_graph_from_schema.ipynb index 9af9779..f91920a 100644 --- a/examples/demo.ipynb +++ b/examples/create_graph_from_schema.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create a knowledge graph with a preset schema" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -18,6 +25,7 @@ ], "source": [ "import os\n", + "import json\n", "\n", "from dotenv import load_dotenv\n", "\n", @@ -55,9 +63,16 @@ "metadata": {}, "outputs": [], "source": [ - "namespace = \"harry_potter_also_again\"\n", - "documents = [\"../data/05_Harry_Potter_and_the_Order_of_the_Phoenix_by_J.K._Rowling.pdf\"]\n", - "questions = [\"What does Harry own?\"]" + "documents = [\"../data/seinfeld/the_puffy_shirt_script.pdf\",\"../data/seinfeld/the_big_salad.pdf\"]\n", + "namespace = \"seinfeld_fixing_uploads_6\"\n", + "schema_file = \"../data/schema_seinfeld.json\" " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add documents to database" ] }, { @@ -75,13 +90,22 @@ ], "source": [ "# Add documents\n", - "documents_response = client.graph.add_documents(namespace, documents)\n", + "documents_response = client.graph.add_documents(\n", + " namespace=namespace, documents=documents)\n", + "\n", "print(documents_response)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create the graph" + ] + }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -94,27 +118,39 @@ ], "source": [ "# Create a graph\n", - "extracted_graph = client.graph.create_graph(namespace, questions)\n", + "extracted_graph = client.graph.create_graph_from_schema(\n", + " namespace=namespace, schema_file=schema_file\n", + ")\n", + "\n", "print(extracted_graph)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Query the graph" + ] + }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "answer='Sirius, Ron, Hermione' cypher_query='MATCH (e1:Entity {name: \"Harry\"})-[:WROTE_TO]->(e2:Entity)\\nRETURN e2.name' context=\"[{'e2.name': 'Sirius'}, {'e2.name': 'Ron'}, {'e2.name': 'Hermione'}]\"\n" + "answer='Jerry knows Kramer, Elaine, George, Leslie, Julie, Margaret, and Newman.'\n" ] } ], "source": [ "# Query the graph\n", - "query = \"Who does Harry write to?\"\n", - "query_response = client.graph.query_graph(namespace, query)\n", + "query = \"Who does Jerry know?\"\n", + "query_response = client.graph.query_graph(\n", + " namespace=namespace, query=query)\n", + "\n", "print(query_response)" ] } diff --git a/src/whyhow/__init__.py b/src/whyhow/__init__.py index 0067ba8..4626c25 100644 --- a/src/whyhow/__init__.py +++ b/src/whyhow/__init__.py @@ -2,5 +2,5 @@ from whyhow.client import AsyncWhyHow, WhyHow -__version__ = "v0.0.3" +__version__ = "v0.0.4" __all__ = ["AsyncWhyHow", "WhyHow"] diff --git a/src/whyhow/apis/graph.py b/src/whyhow/apis/graph.py index aea183a..6c69735 100644 --- a/src/whyhow/apis/graph.py +++ b/src/whyhow/apis/graph.py @@ -1,12 +1,16 @@ """Interacting with the graph API.""" +import json import os from pathlib import Path from whyhow.apis.base import APIBase +from whyhow.schemas.common import Schema as SchemaModel from whyhow.schemas.graph import ( AddDocumentsResponse, CreateGraphResponse, + CreateQuestionGraphRequest, + CreateSchemaGraphRequest, QueryGraphRequest, QueryGraphResponse, QueryGraphReturn, @@ -85,19 +89,50 @@ def create_graph(self, namespace: str, questions: list[str]) -> str: ---------- namespace : str The namespace of the graph to create. - documents : list[str] - The documents to associate with the graph. Only supports PDFs for now. - concepts : list[str] - The concepts to initialize the graph with. + questions : list[str] + The seed concepts to initialize the graph with. """ if not questions: raise ValueError("No questions provided") - params = {"questions": questions} + request_body = CreateQuestionGraphRequest(questions=questions) raw_response = self.client.post( f"{self.prefix}/{namespace}/create_graph", - params=params, + json=request_body.model_dump(), + ) + + raw_response.raise_for_status() + + response = CreateGraphResponse.model_validate(raw_response.json()) + + return response.message + + def create_graph_from_schema( + self, namespace: str, schema_file: str + ) -> str: + """Create a new graph based on a user-defined schema. + + Parameters + ---------- + namespace : str + The namespace of the graph to create. + schema_file : str + The schema file to use to build the graph. + """ + if not schema_file: + raise ValueError("No schema provided") + + with open(schema_file, "r") as file: + schema_data = json.load(file) + + schema_model = SchemaModel(**schema_data) + + request_body = CreateSchemaGraphRequest(graph_schema=schema_model) + + raw_response = self.client.post( + f"{self.prefix}/{namespace}/create_graph_from_schema", + json=request_body.model_dump(), ) raw_response.raise_for_status() diff --git a/src/whyhow/schemas/common.py b/src/whyhow/schemas/common.py index 40b07b2..c39eee5 100644 --- a/src/whyhow/schemas/common.py +++ b/src/whyhow/schemas/common.py @@ -1,6 +1,6 @@ """Shared schemas.""" -from typing import Any +from typing import Any, List, Optional from pydantic import BaseModel, Field, model_validator @@ -135,3 +135,49 @@ def from_relationship(cls, relationship: Relationship) -> "Triple": tail_type=end.labels[0], # take the first label properties=relationship.properties, ) + + +# GRAPH SCHEMA +class SchemaEntity(BaseModel): + """Schema Entity model.""" + + name: str + description: str + + +class SchemaRelation(BaseModel): + """Schema Relation model.""" + + name: str + description: str + + +class TriplePattern(BaseModel): + """Schema Triple Pattern model.""" + + head: str + relation: str + tail: str + description: str + + +class Schema(BaseModel): + """Schema model.""" + + entities: List[SchemaEntity] = Field(default_factory=list) + relations: List[SchemaRelation] = Field(default_factory=list) + patterns: List[TriplePattern] = Field(default_factory=list) + + def get_entity(self, name: str) -> Optional[SchemaEntity]: + """Return an entity by name if it exists in the schema.""" + for entity in self.entities: + if entity.name == name: + return entity + return None # Return None if no entity with that name is found + + def get_relation(self, name: str) -> Optional[SchemaRelation]: + """Return a relation by name if it exists in the schema.""" + for relation in self.relations: + if relation.name == name: + return relation + return None # Return None if no relation with that name is found diff --git a/src/whyhow/schemas/graph.py b/src/whyhow/schemas/graph.py index f7d922e..40bd648 100644 --- a/src/whyhow/schemas/graph.py +++ b/src/whyhow/schemas/graph.py @@ -3,7 +3,7 @@ from typing import Literal from whyhow.schemas.base import BaseRequest, BaseResponse, BaseReturn -from whyhow.schemas.common import Graph +from whyhow.schemas.common import Graph, Schema # Custom types Status = Literal["success", "pending", "failure"] @@ -16,6 +16,18 @@ class AddDocumentsResponse(BaseResponse): message: str +class CreateQuestionGraphRequest(BaseRequest): + """Schema for the request body of the create graph endpoint.""" + + questions: list[str] + + +class CreateSchemaGraphRequest(BaseRequest): + """Schema for the request body of the create graph with schema endpoint.""" + + graph_schema: Schema + + # Request and response schemas class CreateGraphResponse(BaseResponse): """Schema for the response body of the create graph endpoint."""