From 8a0bc0535f96b810a3d94f1c2d62daaf14cac154 Mon Sep 17 00:00:00 2001 From: Alonso Guevara Date: Wed, 11 Sep 2024 16:45:43 -0600 Subject: [PATCH] Release v0.3.4 (#1125) --- .semversioner/0.3.4.json | 14 + .../patch-20240910212339849025.json | 4 - .../patch-20240911214150592475.json | 4 - CHANGELOG.md | 217 +++---- pyproject.toml | 532 +++++++++--------- 5 files changed, 391 insertions(+), 380 deletions(-) create mode 100644 .semversioner/0.3.4.json delete mode 100644 .semversioner/next-release/patch-20240910212339849025.json delete mode 100644 .semversioner/next-release/patch-20240911214150592475.json diff --git a/.semversioner/0.3.4.json b/.semversioner/0.3.4.json new file mode 100644 index 0000000000..1c328d4d23 --- /dev/null +++ b/.semversioner/0.3.4.json @@ -0,0 +1,14 @@ +{ + "changes": [ + { + "description": "Deep copy txt units on local search to avoid race conditions", + "type": "patch" + }, + { + "description": "Fix summarization including empty descriptions", + "type": "patch" + } + ], + "created_at": "2024-09-11T22:31:58+00:00", + "version": "0.3.4" +} \ No newline at end of file diff --git a/.semversioner/next-release/patch-20240910212339849025.json b/.semversioner/next-release/patch-20240910212339849025.json deleted file mode 100644 index d11690df46..0000000000 --- a/.semversioner/next-release/patch-20240910212339849025.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "patch", - "description": "Deep copy txt units on local search to avoid race conditions" -} diff --git a/.semversioner/next-release/patch-20240911214150592475.json b/.semversioner/next-release/patch-20240911214150592475.json deleted file mode 100644 index c4e40b6ca5..0000000000 --- a/.semversioner/next-release/patch-20240911214150592475.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "patch", - "description": "Fix summarization including empty descriptions" -} diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a237d30cb..87bbb82cde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,106 +1,111 @@ -# Changelog -Note: version releases in the 0.x.y range may introduce breaking changes. - -## 0.3.3 - -- patch: Add entrypoints for incremental indexing -- patch: Clean up and organize run index code -- patch: Consistent config loading. Resolves #99 and Resolves #1049 -- patch: Fix circular dependency when running prompt tune api directly -- patch: Fix default settings for embedding -- patch: Fix img for auto tune -- patch: Fix img width -- patch: Fixed a bug in prompt tuning process -- patch: Refactor text unit build at local search -- patch: Update Prompt Tuning docs -- patch: Update create_pipeline_config.py -- patch: Update prompt tune command in docs -- patch: add querying from azure blob storage -- patch: fix setting base_dir to full paths when not using file system. -- patch: fix strategy config in entity_extraction - -## 0.3.2 - -- patch: Add context data to query API responses. -- patch: Add missing config parameter documentation for prompt tuning -- patch: Add neo4j community notebook -- patch: Ensure entity types to be str when running prompt tuning -- patch: Fix weight casting during graph extraction -- patch: Patch "past" dependency issues -- patch: Update developer guide. -- patch: Update query type hints. -- patch: change-lancedb-placement - -## 0.3.1 - -- patch: Add preflight check to check LLM connectivity. -- patch: Add streaming support for local/global search to query cli -- patch: Add support for both float and int on schema validation for community report generation -- patch: Avoid running index on gh-pages publishing -- patch: Implement Index API -- patch: Improves filtering for data dir inferring -- patch: Update to nltk 3.9.1 - -## 0.3.0 - -- minor: Implement auto templating API. -- minor: Implement query engine API. -- patch: Fix file dumps using json for non ASCII chars -- patch: Stabilize smoke tests for query context building -- patch: fix query embedding -- patch: fix sort_context & max_tokens params in verb - -## 0.2.2 - -- patch: Add a check if there is no community record added in local search context -- patch: Add sepparate workflow for Python Tests -- patch: Docs updates -- patch: Run smoke tests on 4o - -## 0.2.1 - -- patch: Added default columns for vector store at create_pipeline_config. No change for other cases. -- patch: Change json parsing error in the map step of global search to warning -- patch: Fix Local Search breaking when loading Embeddings input. Defaulting overwrite to True as in the rest of the vector store config -- patch: Fix json parsing when LLM returns faulty responses -- patch: Fix missing community reports and refactor community context builder -- patch: Fixed a bug that erased the vector database, added a new parameter to specify the config file path, and updated the documentation accordingly. -- patch: Try parsing json before even repairing -- patch: Update Prompt Tuning meta prompts with finer examples -- patch: Update default entity extraction and gleaning prompts to reduce hallucinations -- patch: add encoding-model to entity/claim extraction config -- patch: add encoding-model to text chunking config -- patch: add user prompt to history-tracking llm -- patch: update config reader to allow for zero gleans -- patch: update config-reader to allow for empty chunk-by arrays -- patch: update history-tracking LLm to use 'assistant' instead of 'system' in output history. -- patch: use history argument in hash key computation; add history input to cache data - -## 0.2.0 - -- minor: Add content-based KNN for selecting prompt tune few shot examples -- minor: Add dynamic community report rating to the prompt tuning engine -- patch: Add Minute-based Rate Limiting and fix rpm, tpm settings -- patch: Add N parameter support -- patch: Add cli flag to overlay default values onto a provided config. -- patch: Add exception handling on file load -- patch: Add language support to prompt tuning -- patch: Add llm params to local and global search -- patch: Fix broken prompt tuning link on docs -- patch: Fix delta none on query calls -- patch: Fix docsite base url -- patch: Fix encoding model parameter on prompt tune -- patch: Fix for --limit exceeding the dataframe length -- patch: Fix for Ruff 0.5.2 -- patch: Fixed an issue where base OpenAI embeddings can't work with Azure OpenAI LLM -- patch: Modify defaults for CHUNK_SIZE, CHUNK_OVERLAP and GLEANINGS to reduce time and LLM calls -- patch: fix community_report doesn't work in settings.yaml -- patch: fix llm response content is None in query -- patch: fix the organization parameter is ineffective during queries -- patch: remove duplicate file read -- patch: support non-open ai model config to prompt tune -- patch: use binary io processing for all file io operations - -## 0.1.0 - -- minor: Initial Release +# Changelog +Note: version releases in the 0.x.y range may introduce breaking changes. + +## 0.3.4 + +- patch: Deep copy txt units on local search to avoid race conditions +- patch: Fix summarization including empty descriptions + +## 0.3.3 + +- patch: Add entrypoints for incremental indexing +- patch: Clean up and organize run index code +- patch: Consistent config loading. Resolves #99 and Resolves #1049 +- patch: Fix circular dependency when running prompt tune api directly +- patch: Fix default settings for embedding +- patch: Fix img for auto tune +- patch: Fix img width +- patch: Fixed a bug in prompt tuning process +- patch: Refactor text unit build at local search +- patch: Update Prompt Tuning docs +- patch: Update create_pipeline_config.py +- patch: Update prompt tune command in docs +- patch: add querying from azure blob storage +- patch: fix setting base_dir to full paths when not using file system. +- patch: fix strategy config in entity_extraction + +## 0.3.2 + +- patch: Add context data to query API responses. +- patch: Add missing config parameter documentation for prompt tuning +- patch: Add neo4j community notebook +- patch: Ensure entity types to be str when running prompt tuning +- patch: Fix weight casting during graph extraction +- patch: Patch "past" dependency issues +- patch: Update developer guide. +- patch: Update query type hints. +- patch: change-lancedb-placement + +## 0.3.1 + +- patch: Add preflight check to check LLM connectivity. +- patch: Add streaming support for local/global search to query cli +- patch: Add support for both float and int on schema validation for community report generation +- patch: Avoid running index on gh-pages publishing +- patch: Implement Index API +- patch: Improves filtering for data dir inferring +- patch: Update to nltk 3.9.1 + +## 0.3.0 + +- minor: Implement auto templating API. +- minor: Implement query engine API. +- patch: Fix file dumps using json for non ASCII chars +- patch: Stabilize smoke tests for query context building +- patch: fix query embedding +- patch: fix sort_context & max_tokens params in verb + +## 0.2.2 + +- patch: Add a check if there is no community record added in local search context +- patch: Add sepparate workflow for Python Tests +- patch: Docs updates +- patch: Run smoke tests on 4o + +## 0.2.1 + +- patch: Added default columns for vector store at create_pipeline_config. No change for other cases. +- patch: Change json parsing error in the map step of global search to warning +- patch: Fix Local Search breaking when loading Embeddings input. Defaulting overwrite to True as in the rest of the vector store config +- patch: Fix json parsing when LLM returns faulty responses +- patch: Fix missing community reports and refactor community context builder +- patch: Fixed a bug that erased the vector database, added a new parameter to specify the config file path, and updated the documentation accordingly. +- patch: Try parsing json before even repairing +- patch: Update Prompt Tuning meta prompts with finer examples +- patch: Update default entity extraction and gleaning prompts to reduce hallucinations +- patch: add encoding-model to entity/claim extraction config +- patch: add encoding-model to text chunking config +- patch: add user prompt to history-tracking llm +- patch: update config reader to allow for zero gleans +- patch: update config-reader to allow for empty chunk-by arrays +- patch: update history-tracking LLm to use 'assistant' instead of 'system' in output history. +- patch: use history argument in hash key computation; add history input to cache data + +## 0.2.0 + +- minor: Add content-based KNN for selecting prompt tune few shot examples +- minor: Add dynamic community report rating to the prompt tuning engine +- patch: Add Minute-based Rate Limiting and fix rpm, tpm settings +- patch: Add N parameter support +- patch: Add cli flag to overlay default values onto a provided config. +- patch: Add exception handling on file load +- patch: Add language support to prompt tuning +- patch: Add llm params to local and global search +- patch: Fix broken prompt tuning link on docs +- patch: Fix delta none on query calls +- patch: Fix docsite base url +- patch: Fix encoding model parameter on prompt tune +- patch: Fix for --limit exceeding the dataframe length +- patch: Fix for Ruff 0.5.2 +- patch: Fixed an issue where base OpenAI embeddings can't work with Azure OpenAI LLM +- patch: Modify defaults for CHUNK_SIZE, CHUNK_OVERLAP and GLEANINGS to reduce time and LLM calls +- patch: fix community_report doesn't work in settings.yaml +- patch: fix llm response content is None in query +- patch: fix the organization parameter is ineffective during queries +- patch: remove duplicate file read +- patch: support non-open ai model config to prompt tune +- patch: use binary io processing for all file io operations + +## 0.1.0 + +- minor: Initial Release diff --git a/pyproject.toml b/pyproject.toml index d9ca81f9de..394cd17681 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,266 +1,266 @@ -[tool.poetry] -name = "graphrag" -# Maintainers: do not change the version here manually, use ./scripts/release.sh -version = "0.3.3" -description = "" -authors = [ - "Alonso Guevara Fernández ", - "Andrés Morales Esquivel ", - "Chris Trevino ", - "David Tittsworth ", - "Dayenne de Souza ", - "Derek Worthen ", - "Gaudy Blanco Meneses ", - "Ha Trinh ", - "Jonathan Larson ", - "Josh Bradley ", - "Kate Lytvynets ", - "Kenny Zhang ", - "Mónica Carvajal", - "Nathan Evans ", - "Rodrigo Racanicci ", - "Sarah Smith ", -] -license = "MIT" -readme = "README.md" -packages = [{ include = "graphrag" }] - -[tool.poetry.urls] -"Source" = "https://github.com/microsoft/graphrag" - -[tool.poetry-dynamic-versioning] -enable = true -style = "pep440" -vcs = "git" -bump = true -format-jinja = """ - {%- if distance == 0 -%} - {{ serialize_pep440(base, stage, revision) }} - {%- else -%} - {{ serialize_pep440(base, stage, revision, dev=distance) }} - {%- endif -%} -""" - -[tool.poetry.dependencies] -python = ">=3.10,<3.13" -environs = "^11.0.0" -datashaper = "^0.0.49" - -# Vector Stores -azure-search-documents = "^11.4.0" -lancedb = "^0.12.0" - -# Event Loops -uvloop = { version = "^0.20.0", markers = "platform_system != 'Windows'" } -nest-asyncio = { version = "^1.6.0", markers = "platform_system == 'Windows'" } - -# Async IO -aiolimiter = "^1.1.0" -aiofiles = "^24.1.0" - -# LLM -openai = "^1.37.1" -nltk = "3.9.1" -tiktoken = "^0.7.0" - -# Data-Sci -numba = "0.60.0" -numpy = "^1.25.2" -graspologic = "^3.4.1" -networkx = "^3" -fastparquet = "^2024.2.0" -# 1.13.0 was a footgun -scipy = "1.12.0" - -# Configuration -pyyaml = "^6.0.2" -pyaml-env = "^1.2.1" -python-dotenv = "^1.0.0" - -# Network -tenacity = "^9.0.0" - -swifter = "^1.4.0" -pydantic = "^2" -rich = "^13.6.0" -textual = "^0.78.0" -devtools = "^0.12.2" - -typing-extensions = "^4.12.2" - -#Azure -azure-storage-blob = "^12.22.0" -azure-identity = "^1.17.1" -json-repair = "^0.28.4" - -future = "^1.0.0" -[tool.poetry.group.dev.dependencies] -coverage = "^7.6.0" -ipykernel = "^6.29.4" -jupyter = "^1.0.0" -nbconvert = "^7.16.3" -poethepoet = "^0.27.0" -pyright = "^1.1.371" -pytest = "^8.3.2" -pytest-asyncio = "^0.24.0" -pytest-timeout = "^2.3.1" -ruff = "^0.6.2" -semversioner = "^2.0.3" - -update-toml = "^0.2.1" - -[build-system] -requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"] -build-backend = "poetry_dynamic_versioning.backend" - -[tool.poe.tasks] -_sort_imports = "ruff check --select I --fix . --preview" -_format_code = "ruff format . --preview" -_ruff_check = 'ruff check . --preview' -_pyright = "pyright" -_convert_local_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/local_search.ipynb' -_convert_global_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/global_search.ipynb' -_semversioner_release = "semversioner release" -_semversioner_changelog = "semversioner changelog > CHANGELOG.md" -_semversioner_update_toml_version = "update-toml update --path tool.poetry.version --value $(poetry run semversioner current-version)" -semversioner_add = "semversioner add-change" -coverage_report = 'coverage report --omit "**/tests/**" --show-missing' -check_format = 'ruff format . --check --preview' -fix = "ruff --preview check --fix ." -fix_unsafe = "ruff check --preview --fix --unsafe-fixes ." - -_test_all = "coverage run -m pytest ./tests" -test_unit = "pytest ./tests/unit" -test_integration = "pytest ./tests/integration" -test_smoke = "pytest ./tests/smoke" -test_notebook = "pytest ./tests/notebook" -index = "python -m graphrag.index" -query = "python -m graphrag.query" -prompt_tune = "python -m graphrag.prompt_tune" -# Pass in a test pattern -test_only = "pytest -s -k" - -[[tool.poe.tasks.release]] -sequence = [ - '_semversioner_release', - '_semversioner_changelog', - '_semversioner_update_toml_version', -] -ignore_fail = 'return_non_zero' - -[[tool.poe.tasks.convert_docsite_notebooks]] -sequence = ['_convert_local_search_nb', '_convert_global_search_nb'] -ignore_fail = 'return_non_zero' - -[[tool.poe.tasks.format]] -sequence = ['_sort_imports', '_format_code'] -ignore_fail = 'return_non_zero' - -[[tool.poe.tasks.check]] -sequence = ['check_format', '_ruff_check', '_pyright'] -ignore_fail = 'return_non_zero' - -[[tool.poe.tasks.test]] -sequence = ['_test_all', 'coverage_report'] -ignore_fail = 'return_non_zero' - -[tool.ruff] -target-version = "py310" -extend-include = ["*.ipynb"] - -[tool.ruff.format] -docstring-code-format = true -docstring-code-line-length = 20 - -[tool.ruff.lint] -select = [ - "E4", - "E7", - "E9", - "W291", - "YTT", - "T10", - "ICN", - "INP", - "Q", - "RSE", - "SLOT", - "INT", - "FLY", - "LOG", - "C90", - "T20", - "D", - "RET", - "PD", - "N", - "PIE", - "SIM", - "S", - "G", - "ERA", - "ASYNC", - "TID", - "UP", - "SLF", - "BLE", - "C4", - "I", - "F", - "A", - "ARG", - "PTH", - "RUF", - "B", - "TCH", - "DTZ", - "PYI", - "PT", - "EM", - "TRY", - "PERF", - "CPY", - # "FBT", # use named arguments for boolean flags - # "TD", # todos - # "FIX", # fixme - # "FURB" # preview rules - # ANN # Type annotations, re-enable when we get bandwidth -] -ignore = [ - # Ignore module names shadowing Python builtins - "A005", - # Deprecated Rules - "ANN101", - "ANN102", - # Conflicts with interface argument checking - "ARG002", - "ANN204", - # TODO: Inspect these pandas rules for validity - "PD002", # prevents inplace=True - # TODO RE-Enable when we get bandwidth - "PERF203", # Needs restructuring of errors, we should bail-out on first error - "C901", # needs refactoring to remove cyclomatic complexity -] - -[tool.ruff.lint.per-file-ignores] -"tests/*" = ["S", "D", "ANN", "T201", "ASYNC", "ARG", "PTH", "TRY"] -"examples/*" = ["S", "D", "ANN", "T201", "PTH", "TRY", "PERF"] -"graphrag/index/config/*" = ["TCH"] -"*.ipynb" = ["T201"] - -[tool.ruff.lint.flake8-builtins] -builtins-ignorelist = ["input", "id", "bytes"] - -[tool.ruff.lint.pydocstyle] -convention = "numpy" - -# https://github.com/microsoft/pyright/blob/9f81564a4685ff5c55edd3959f9b39030f590b2f/docs/configuration.md#sample-pyprojecttoml-file -[tool.pyright] -include = ["graphrag", "tests", "examples", "examples_notebooks"] -exclude = ["**/node_modules", "**/__pycache__"] - -[tool.pytest.ini_options] -asyncio_mode = "auto" -timeout = 800 -# log_cli = true -# log_cli_level = "INFO" +[tool.poetry] +name = "graphrag" +# Maintainers: do not change the version here manually, use ./scripts/release.sh +version = "0.3.4" +description = "" +authors = [ + "Alonso Guevara Fernández ", + "Andrés Morales Esquivel ", + "Chris Trevino ", + "David Tittsworth ", + "Dayenne de Souza ", + "Derek Worthen ", + "Gaudy Blanco Meneses ", + "Ha Trinh ", + "Jonathan Larson ", + "Josh Bradley ", + "Kate Lytvynets ", + "Kenny Zhang ", + "Mónica Carvajal", + "Nathan Evans ", + "Rodrigo Racanicci ", + "Sarah Smith ", +] +license = "MIT" +readme = "README.md" +packages = [{ include = "graphrag" }] + +[tool.poetry.urls] +"Source" = "https://github.com/microsoft/graphrag" + +[tool.poetry-dynamic-versioning] +enable = true +style = "pep440" +vcs = "git" +bump = true +format-jinja = """ + {%- if distance == 0 -%} + {{ serialize_pep440(base, stage, revision) }} + {%- else -%} + {{ serialize_pep440(base, stage, revision, dev=distance) }} + {%- endif -%} +""" + +[tool.poetry.dependencies] +python = ">=3.10,<3.13" +environs = "^11.0.0" +datashaper = "^0.0.49" + +# Vector Stores +azure-search-documents = "^11.4.0" +lancedb = "^0.12.0" + +# Event Loops +uvloop = { version = "^0.20.0", markers = "platform_system != 'Windows'" } +nest-asyncio = { version = "^1.6.0", markers = "platform_system == 'Windows'" } + +# Async IO +aiolimiter = "^1.1.0" +aiofiles = "^24.1.0" + +# LLM +openai = "^1.37.1" +nltk = "3.9.1" +tiktoken = "^0.7.0" + +# Data-Sci +numba = "0.60.0" +numpy = "^1.25.2" +graspologic = "^3.4.1" +networkx = "^3" +fastparquet = "^2024.2.0" +# 1.13.0 was a footgun +scipy = "1.12.0" + +# Configuration +pyyaml = "^6.0.2" +pyaml-env = "^1.2.1" +python-dotenv = "^1.0.0" + +# Network +tenacity = "^9.0.0" + +swifter = "^1.4.0" +pydantic = "^2" +rich = "^13.6.0" +textual = "^0.78.0" +devtools = "^0.12.2" + +typing-extensions = "^4.12.2" + +#Azure +azure-storage-blob = "^12.22.0" +azure-identity = "^1.17.1" +json-repair = "^0.28.4" + +future = "^1.0.0" +[tool.poetry.group.dev.dependencies] +coverage = "^7.6.0" +ipykernel = "^6.29.4" +jupyter = "^1.0.0" +nbconvert = "^7.16.3" +poethepoet = "^0.27.0" +pyright = "^1.1.371" +pytest = "^8.3.2" +pytest-asyncio = "^0.24.0" +pytest-timeout = "^2.3.1" +ruff = "^0.6.2" +semversioner = "^2.0.3" + +update-toml = "^0.2.1" + +[build-system] +requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"] +build-backend = "poetry_dynamic_versioning.backend" + +[tool.poe.tasks] +_sort_imports = "ruff check --select I --fix . --preview" +_format_code = "ruff format . --preview" +_ruff_check = 'ruff check . --preview' +_pyright = "pyright" +_convert_local_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/local_search.ipynb' +_convert_global_search_nb = 'jupyter nbconvert --output-dir=docsite/posts/query/notebooks/ --output="{notebook_name}_nb" --template=docsite/nbdocsite_template --to markdown examples_notebooks/global_search.ipynb' +_semversioner_release = "semversioner release" +_semversioner_changelog = "semversioner changelog > CHANGELOG.md" +_semversioner_update_toml_version = "update-toml update --path tool.poetry.version --value $(poetry run semversioner current-version)" +semversioner_add = "semversioner add-change" +coverage_report = 'coverage report --omit "**/tests/**" --show-missing' +check_format = 'ruff format . --check --preview' +fix = "ruff --preview check --fix ." +fix_unsafe = "ruff check --preview --fix --unsafe-fixes ." + +_test_all = "coverage run -m pytest ./tests" +test_unit = "pytest ./tests/unit" +test_integration = "pytest ./tests/integration" +test_smoke = "pytest ./tests/smoke" +test_notebook = "pytest ./tests/notebook" +index = "python -m graphrag.index" +query = "python -m graphrag.query" +prompt_tune = "python -m graphrag.prompt_tune" +# Pass in a test pattern +test_only = "pytest -s -k" + +[[tool.poe.tasks.release]] +sequence = [ + '_semversioner_release', + '_semversioner_changelog', + '_semversioner_update_toml_version', +] +ignore_fail = 'return_non_zero' + +[[tool.poe.tasks.convert_docsite_notebooks]] +sequence = ['_convert_local_search_nb', '_convert_global_search_nb'] +ignore_fail = 'return_non_zero' + +[[tool.poe.tasks.format]] +sequence = ['_sort_imports', '_format_code'] +ignore_fail = 'return_non_zero' + +[[tool.poe.tasks.check]] +sequence = ['check_format', '_ruff_check', '_pyright'] +ignore_fail = 'return_non_zero' + +[[tool.poe.tasks.test]] +sequence = ['_test_all', 'coverage_report'] +ignore_fail = 'return_non_zero' + +[tool.ruff] +target-version = "py310" +extend-include = ["*.ipynb"] + +[tool.ruff.format] +docstring-code-format = true +docstring-code-line-length = 20 + +[tool.ruff.lint] +select = [ + "E4", + "E7", + "E9", + "W291", + "YTT", + "T10", + "ICN", + "INP", + "Q", + "RSE", + "SLOT", + "INT", + "FLY", + "LOG", + "C90", + "T20", + "D", + "RET", + "PD", + "N", + "PIE", + "SIM", + "S", + "G", + "ERA", + "ASYNC", + "TID", + "UP", + "SLF", + "BLE", + "C4", + "I", + "F", + "A", + "ARG", + "PTH", + "RUF", + "B", + "TCH", + "DTZ", + "PYI", + "PT", + "EM", + "TRY", + "PERF", + "CPY", + # "FBT", # use named arguments for boolean flags + # "TD", # todos + # "FIX", # fixme + # "FURB" # preview rules + # ANN # Type annotations, re-enable when we get bandwidth +] +ignore = [ + # Ignore module names shadowing Python builtins + "A005", + # Deprecated Rules + "ANN101", + "ANN102", + # Conflicts with interface argument checking + "ARG002", + "ANN204", + # TODO: Inspect these pandas rules for validity + "PD002", # prevents inplace=True + # TODO RE-Enable when we get bandwidth + "PERF203", # Needs restructuring of errors, we should bail-out on first error + "C901", # needs refactoring to remove cyclomatic complexity +] + +[tool.ruff.lint.per-file-ignores] +"tests/*" = ["S", "D", "ANN", "T201", "ASYNC", "ARG", "PTH", "TRY"] +"examples/*" = ["S", "D", "ANN", "T201", "PTH", "TRY", "PERF"] +"graphrag/index/config/*" = ["TCH"] +"*.ipynb" = ["T201"] + +[tool.ruff.lint.flake8-builtins] +builtins-ignorelist = ["input", "id", "bytes"] + +[tool.ruff.lint.pydocstyle] +convention = "numpy" + +# https://github.com/microsoft/pyright/blob/9f81564a4685ff5c55edd3959f9b39030f590b2f/docs/configuration.md#sample-pyprojecttoml-file +[tool.pyright] +include = ["graphrag", "tests", "examples", "examples_notebooks"] +exclude = ["**/node_modules", "**/__pycache__"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +timeout = 800 +# log_cli = true +# log_cli_level = "INFO"