Skip to content

Commit

Permalink
Enable GitHub Actions CI (#3)
Browse files Browse the repository at this point in the history
* disable codeql for now

* update gh-action names

* reformat

* update secrets in ci

* mark secrets as required

* split up check, build, test

* update turbo

* spit up testing strata

* resolve pydantic deprecations

* wire up openai env vars for test

* use section.value in env-var reading

* llvm install

* start azurite before unit tests

* skip azure smoke test for now to get CI in place

* formatting

* smoke test logging

* print out len(key) in the fixture

* use fragment type in factories

* formatting

* secret use update

* remove is_clean check
  • Loading branch information
darthtrevino authored Apr 2, 2024
1 parent 4759532 commit e1eefbc
Show file tree
Hide file tree
Showing 15 changed files with 209 additions and 223 deletions.
39 changes: 26 additions & 13 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: JavaScript CI
name: CI
on:
push:
branches: [main]
Expand Down Expand Up @@ -33,6 +33,11 @@ jobs:
with:
poetry-version: '1.6.1'


- run: |
sudo apt-get update
sudo apt-get install -y llvm-11 python3-dev
name: "LLVM install"
- uses: actions/checkout@v3

Expand Down Expand Up @@ -61,20 +66,28 @@ jobs:
repo-token: ${{ secrets.GITHUB_TOKEN }}
server-token: ${{ secrets.GITHUB_TOKEN }}

- run: yarn check
name: Static Checks

- run: yarn build
name: Build

- run: yarn start:azurite&
name: Start Azurite

- run: yarn ci
name: Verify
- run: yarn test:unit
name: Unit Tests
env:
GRAPHRAG_API_KEY: $(openaiApiKey)
GRAPHRAG_LLM_MODEL: $(completionModel)
GRAPHRAG_EMBEDDING_MODEL: $(embeddingModel)
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}

- run: |
git add -A
git status
name: Git status
- run: yarn test:integration
name: Integration Tests
env:
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}

- run: yarn is_clean
name: Check if repo is clean
- run: yarn test:smoke
name: Smoke Tests
env:
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHRAG_LLM_MODEL: ${{ secrets.OPENAI_LLM_MODEL }}
GRAPHRAG_EMBEDDING_MODEL: ${{ secrets.OPENAI_EMBEDDING_MODEL }}
48 changes: 0 additions & 48 deletions .github/workflows/codeql.yml

This file was deleted.

2 changes: 1 addition & 1 deletion .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Python-Publish-CI
name: Python Publish
on:
push:
branches: [main]
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@
"devDependencies": {
"cspell": "^8.3.2",
"npm-run-all": "^4.1.5",
"turbo": "^1.12.4"
"turbo": "^1.13.0"
}
}
4 changes: 2 additions & 2 deletions python/graphrag/graphrag/index/default_config/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def load_pipeline_config(config_or_path: str | PipelineConfig) -> PipelineConfig
for extended_config in config.extends:
extended_config = load_pipeline_config(extended_config)
merged_config = {
**json.loads(extended_config.json()),
**json.loads(config.json(exclude_unset=True)),
**json.loads(extended_config.model_dump_json()),
**json.loads(config.model_dump_json(exclude_unset=True)),
}
config = PipelineConfig.model_validate(merged_config)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,20 +115,23 @@ def default_config_parameters_from_env_vars(
root_dir = root_dir or str(Path.cwd())
env = _make_env(root_dir)

def _str(key: str, default_value: str | None = None) -> str | None:
return env(key, default_value)
def _key(key: str | Fragment) -> str | None:
return key.value if isinstance(key, Fragment) else key

def _int(key: str, default_value: int | None = None) -> int | None:
return env.int(key, default_value)
def _str(key: str | Fragment, default_value: str | None = None) -> str | None:
return env(_key(key), default_value)

def _bool(key: str, default_value: bool | None = None) -> bool | None:
return env.bool(key, default_value)
def _int(key: str | Fragment, default_value: int | None = None) -> int | None:
return env.int(_key(key), default_value)

def _float(key: str, default_value: float | None = None) -> float | None:
return env.float(key, default_value)
def _bool(key: str | Fragment, default_value: bool | None = None) -> bool | None:
return env.bool(_key(key), default_value)

def section(key: str):
return env.prefixed(f"{key}_")
def _float(key: str | Fragment, default_value: float | None = None) -> float | None:
return env.float(_key(key), default_value)

def section(key: Section):
return env.prefixed(f"{key.value}_")

fallback_oai_key = _str("OPENAI_API_KEY", _str("AZURE_OPENAI_API_KEY"))
fallback_oai_org = _str("OPENAI_ORG_ID")
Expand Down
216 changes: 108 additions & 108 deletions python/graphrag/graphrag/vector_stores/qdrant.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,111 +3,111 @@
# Licensed under the MIT license. See LICENSE file in the project.
#

"""A package containing the Qdrant vector store implementation."""

from typing import Any

from qdrant_client import QdrantClient # type: ignore
from qdrant_client.http import models # type: ignore
from qdrant_client.models import Distance, VectorParams # type: ignore

from graphrag.model.types import TextEmbedder

from .base import BaseVectorStore, VectorStoreDocument, VectorStoreSearchResult


class Qdrant(BaseVectorStore):
"""The Qdrant vector storage implementation."""

def connect(self, **kwargs: Any) -> Any:
"""Connect to the Qdrant vector store."""
url = kwargs.get("url", None)
port = kwargs.get("port", 6333)

api_key = kwargs.get("api_key", None)
timeout = kwargs.get("timeout", 1000)
self.vector_size = kwargs.get("vector_size", 1536)

if url:
https = kwargs.get("https", "https://" in url)
self.db_connection = QdrantClient(
url=url, port=port, api_key=api_key, https=https, timeout=timeout
)
else:
# create in-memory db
self.db_connection = QdrantClient(":memory:")

def load_documents(
self, documents: list[VectorStoreDocument], overwrite: bool = True
) -> None:
"""Load documents into the vector store."""
if overwrite:
self.db_connection.recreate_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(
size=(
len(documents[0].vector)
if len(documents) > 0 and documents[0].vector
else self.vector_size
),
distance=Distance.COSINE,
),
)

self.db_connection.upsert(
collection_name=self.collection_name,
points=models.Batch(
ids=[doc.id for doc in documents],
vectors=[doc.vector if doc.vector else [] for doc in documents],
payloads=[{"text": doc.text, **doc.attributes} for doc in documents],
),
)

def filter_by_id(self, include_ids: list[str] | list[int]) -> Any:
"""Build a query filter to filter documents by id."""
self.query_filter = models.Filter(
must=[
models.HasIdCondition(has_id=include_ids), # type: ignore
],
)
return self.query_filter

def similarity_search_by_vector(
self, query_embedding: list[float], k: int = 10, **kwargs: Any
) -> list[VectorStoreSearchResult]:
"""Perform a vector-based similarity search."""
docs = self.db_connection.search(
collection_name=self.collection_name,
query_filter=self.query_filter,
query_vector=query_embedding,
limit=k,
with_vectors=True,
)

return [
VectorStoreSearchResult(
document=VectorStoreDocument(
id=doc.id,
text=doc.payload["text"] if doc.payload else "",
vector=doc.vector if doc.vector else [], # type: ignore
attributes=(
{k: v for k, v in doc.payload.items() if k != "text"}
if doc.payload
else {}
),
),
score=1 - abs(doc.score),
)
for doc in docs
]

def similarity_search_by_text(
self, text: str, text_embedder: TextEmbedder, k: int = 10, **kwargs: Any
) -> list[VectorStoreSearchResult]:
"""Perform a text-based similarity search."""
query_embedding = text_embedder(text)
if query_embedding:
return self.similarity_search_by_vector(
query_embedding=query_embedding, k=k
)
return []
"""A package containing the Qdrant vector store implementation."""

from typing import Any

from qdrant_client import QdrantClient # type: ignore
from qdrant_client.http import models # type: ignore
from qdrant_client.models import Distance, VectorParams # type: ignore

from graphrag.model.types import TextEmbedder

from .base import BaseVectorStore, VectorStoreDocument, VectorStoreSearchResult


class Qdrant(BaseVectorStore):
"""The Qdrant vector storage implementation."""

def connect(self, **kwargs: Any) -> Any:
"""Connect to the Qdrant vector store."""
url = kwargs.get("url", None)
port = kwargs.get("port", 6333)

api_key = kwargs.get("api_key", None)
timeout = kwargs.get("timeout", 1000)
self.vector_size = kwargs.get("vector_size", 1536)

if url:
https = kwargs.get("https", "https://" in url)
self.db_connection = QdrantClient(
url=url, port=port, api_key=api_key, https=https, timeout=timeout
)
else:
# create in-memory db
self.db_connection = QdrantClient(":memory:")

def load_documents(
self, documents: list[VectorStoreDocument], overwrite: bool = True
) -> None:
"""Load documents into the vector store."""
if overwrite:
self.db_connection.recreate_collection(
collection_name=self.collection_name,
vectors_config=VectorParams(
size=(
len(documents[0].vector)
if len(documents) > 0 and documents[0].vector
else self.vector_size
),
distance=Distance.COSINE,
),
)

self.db_connection.upsert(
collection_name=self.collection_name,
points=models.Batch(
ids=[doc.id for doc in documents],
vectors=[doc.vector if doc.vector else [] for doc in documents],
payloads=[{"text": doc.text, **doc.attributes} for doc in documents],
),
)

def filter_by_id(self, include_ids: list[str] | list[int]) -> Any:
"""Build a query filter to filter documents by id."""
self.query_filter = models.Filter(
must=[
models.HasIdCondition(has_id=include_ids), # type: ignore
],
)
return self.query_filter

def similarity_search_by_vector(
self, query_embedding: list[float], k: int = 10, **kwargs: Any
) -> list[VectorStoreSearchResult]:
"""Perform a vector-based similarity search."""
docs = self.db_connection.search(
collection_name=self.collection_name,
query_filter=self.query_filter,
query_vector=query_embedding,
limit=k,
with_vectors=True,
)

return [
VectorStoreSearchResult(
document=VectorStoreDocument(
id=doc.id,
text=doc.payload["text"] if doc.payload else "",
vector=doc.vector if doc.vector else [], # type: ignore
attributes=(
{k: v for k, v in doc.payload.items() if k != "text"}
if doc.payload
else {}
),
),
score=1 - abs(doc.score),
)
for doc in docs
]

def similarity_search_by_text(
self, text: str, text_embedder: TextEmbedder, k: int = 10, **kwargs: Any
) -> list[VectorStoreSearchResult]:
"""Perform a text-based similarity search."""
query_embedding = text_embedder(text)
if query_embedding:
return self.similarity_search_by_vector(
query_embedding=query_embedding, k=k
)
return []
Loading

0 comments on commit e1eefbc

Please sign in to comment.