Skip to content

Commit

Permalink
Improve linting (#1129)
Browse files Browse the repository at this point in the history
  • Loading branch information
NiklasKoehneckeAA authored Nov 12, 2024
1 parent ebab18d commit 017a3af
Show file tree
Hide file tree
Showing 33 changed files with 118 additions and 77 deletions.
3 changes: 0 additions & 3 deletions .darglint2

This file was deleted.

22 changes: 11 additions & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
rev: v5.0.0
hooks:
- id: check-json
- id: pretty-format-json
Expand All @@ -9,32 +9,32 @@ repos:
- --autofix
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.3.5
rev: v0.7.3
hooks:
# Run the linter.
- id: ruff
name: ruff-lint
args: [ --fix ]
types_or: [ python, pyi, jupyter ]
args: [--fix]
types_or: [python, pyi, jupyter]
# Run the formatter.
- id: ruff-format
types_or: [ python, pyi, jupyter ]
types_or: [python, pyi, jupyter]
- repo: https://github.com/kynan/nbstripout
rev: 0.7.1
rev: 0.8.0
hooks:
- id: nbstripout
files: ".ipynb"
- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
rev: v2.3.0
hooks:
- id: codespell
args:
[
"-L",
"newyorker,te,responde,ist,als,oder,technik,sie,rouge,unter,juli,fiel,couldn,mke, vor,fille,ans",
]
exclude: '^(poetry\.lock|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/examples/qa/multiple_chunk_qa.py|src/intelligence_layer/examples/summarize/.*|tests/connectors/retrievers/test_document_index_retriever\.py|src/intelligence_layer/examples/classify/keyword_extract.py|tests/examples/summarize/test_single_chunk_few_shot_summarize.py|tests/examples/summarize/very_long_text.txt|src/intelligence_layer/learning/enrich.py)$'
- repo: https://github.com/akaihola/darglint2
rev: v1.8.2
exclude: '^(poetry\.lock|tests/.*|src/intelligence_layer/examples/qa/multiple_chunk_qa.py|src/intelligence_layer/examples/summarize/.*|src/intelligence_layer/examples/classify/keyword_extract.py|src/intelligence_layer/learning/enrich.py)$'
- repo: https://github.com/jsh9/pydoclint
rev: 0.5.9
hooks:
- id: darglint2
- id: pydoclint
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
- Add `how_to_upload_existing_datasets_to_studio.ipynb` to how-tos

### Fixes
...
- Improved some docstring inconsistencies across the codebase and switched the docstring checker to pydoclint.
### Deprecations
...

Expand Down
26 changes: 22 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ extend-include = ["*.ipynb"]
# defaults: ["E4", "E7", "E9", "F"]
# check https://docs.astral.sh/ruff/rules/ for more info
select = [
"E4", "E7", "E9",
"E4",
"E7",
"E9",
"F",
"W",
# isort
Expand All @@ -114,7 +116,7 @@ select = [
# bugbear
"B",
# pydocstring
"D"
"D",


]
Expand All @@ -124,8 +126,10 @@ ignore = [
"E501",
# X | Y for hinting over optional
"UP007",
# unusual unicodes, next() instead of [0]
"RUF001", "RUF015",
# next() instead of [0],
"RUF015",
# unusual unicodes
"RUF001",
# warnings should be self explanatory
"B028",
# we do not docstring everything
Expand All @@ -138,3 +142,17 @@ known-first-party = ["intelligence_layer", "documentation"]

[tool.ruff.lint.pydocstyle]
convention = "google"

[tool.pydoclint]
style = 'google'
exclude = '\.git|tests'
quiet = true
require-return-section-when-returning-nothing = false
check-return-types = false
require-yield-section-when-yielding-nothing = false
check-yield-types = false
arg-type-hints-in-docstring = false
allow-init-docstring = true
skip-checking-raises = true
# this does not work well with type definitions in class attributes
check-class-attributes = false
26 changes: 14 additions & 12 deletions src/intelligence_layer/connectors/document_index/document_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,10 @@ def validate_and_convert_datetime(
"""Validate field_value and convert datetime to RFC3339 format with Z suffix.
Args:
cls (BaseModel): The class that this method is bound to.
v (Union[str, int, float, bool]): The value to be validated and converted. # noqa: DAR102: + cls
v: The value to be validated and converted. # noqa: DAR102: + cls
Returns:
Union[str, int, float, bool]: The validated and converted value.
The validated and converted value.
"""
if isinstance(v, datetime):
if v.tzinfo is None or v.tzinfo.utcoffset(v) is None:
Expand Down Expand Up @@ -338,6 +337,12 @@ class DocumentIndexError(RuntimeError):
"""

def __init__(self, message: str, status_code: HTTPStatus) -> None:
"""Initialize the error.
Args:
message: Message to return.
status_code: Status code to return.
"""
super().__init__(message)
self.message = message
self.status_code = status_code
Expand Down Expand Up @@ -391,7 +396,7 @@ class DocumentIndexClient:
Document Index is a tool for managing collections of documents, enabling operations such as creation, deletion, listing, and searching.
Documents can be stored either in the cloud or in a local deployment.
Attributes:
Args:
token: A valid token for the document index API.
base_document_index_url: The url of the document index' API.
Expand Down Expand Up @@ -558,13 +563,10 @@ def create_filter_index_in_namespace(
"""Create a filter index in a specified namespace.
Args:
namespace (str): The namespace in which to create the filter index.
filter_index_name (str): The name of the filter index to create.
field_name (str): The name of the field to index.
field_type (Literal["string", "integer", "float", "boolean", "datetime"]): The type of the field to index.
Returns:
None
namespace: The namespace in which to create the filter index.
filter_index_name: The name of the filter index to create.
field_name: The name of the field to index.
field_type: The type of the field to index.
"""
if not re.match(r"^[a-zA-Z0-9\-.]+$", filter_index_name):
raise ValueError(
Expand Down Expand Up @@ -649,8 +651,8 @@ def delete_index_from_collection(
"""Delete an index from a collection.
Args:
index_name: Name of the index.
collection_path: Path to the collection of interest.
index_name: Name of the index.
"""
url_suffix = f"collections/{collection_path.namespace}/{collection_path.collection}/indexes/{index_name}"
url = urljoin(self._base_document_index_url, url_suffix)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ def from_env(
this method tries to fetch it from the environment under the name of "CLIENT_URL".
If this is not present, it defaults to the Aleph Alpha Api.
If you have an on premise setup, change this to your host URL.
Returns:
A `LimitedConcurrencyClient`
"""
if token is None:
token = getenv("AA_TOKEN")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ class QdrantInMemoryRetriever(BaseRetriever[int]):
When run, the given query is embedded and scored against the document embeddings to retrieve the k-most similar matches by cosine similarity.
Args:
client: Aleph Alpha client instance for running model related API calls.
texts: The sequence of texts to be made searchable.
documents: The sequence of documents to be made searchable.
k: The (top) number of documents to be returned by search.
client: Aleph Alpha client instance for running model related API calls.
threshold: The mimumum value of cosine similarity between the query vector and the document vector.
retriever_type: The type of retriever to be instantiated.
Should be `ASYMMETRIC` for most query-document retrieveal use cases, `SYMMETRIC` is optimized
Expand Down Expand Up @@ -157,6 +157,9 @@ def get_filtered_documents_with_scores(
Args:
query: The text to be searched with.
filter: Conditions to filter by.
Returns:
All documents that correspond to the query and pass the filter.
"""
query_embedding = self._embed(query, self._query_representation)
search_result = self._search_client.search(
Expand Down
10 changes: 8 additions & 2 deletions src/intelligence_layer/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,8 +438,11 @@ def to_instruct_prompt(
input: Any context necessary to solve the task, such as the text to be summarize
response_prefix: Optional argument to append a string to the beginning of the
final agent message to steer the generation
input_controls: TextControls for the input part of the prompt. Only for text prompts
instruction_controls: TextControls for the instruction part of the prompt. Only for text prompts
instruction_controls: TextControls for the instruction part of the prompt. Only for text prompts.
input_controls: TextControls for the input part of the prompt. Only for text prompts.
Returns:
The rendered prompt with all variables filled in.
"""
rich_prompt = self.INSTRUCTION_PROMPT_TEMPLATE.to_rich_prompt(
instruction=instruction, input=input, response_prefix=response_prefix
Expand Down Expand Up @@ -706,6 +709,9 @@ def to_instruct_prompt(
final agent message to steer the generation
instruction_controls: Instruction controls are not used but needed for the interface.
input_controls: Input controls are not used but needed for the interface
Returns:
The rendered prompt with all variables filled in.
"""
if instruction_controls or input_controls:
warnings.warn(
Expand Down
16 changes: 13 additions & 3 deletions src/intelligence_layer/core/prompt_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,9 +218,13 @@ def __init__(self, template_str: str) -> None:
def placeholder(self, value: Union[Image, Tokens]) -> Placeholder:
"""Saves a non-text prompt item to the template and returns a placeholder.
The placeholder is used to embed the prompt item in the template
Args:
value: Tokens to store
The placeholder is used to embed the prompt item in the template
Returns:
A placeholder for the given non-text item.
"""
id = Placeholder(uuid4())
self._prompt_item_placeholders[id] = value
Expand Down Expand Up @@ -258,6 +262,9 @@ def embed_prompt(self, prompt: Prompt) -> str:
... ])
>>> template = PromptTemplate("Question: {{user_prompt}}\n Answer: ")
>>> prompt = template.to_rich_prompt(user_prompt=template.embed_prompt(user_prompt))
Returns:
The prompt template with the embedded prompt.
"""
prompt_text = ""
last_item = None
Expand All @@ -276,10 +283,13 @@ def embed_prompt(self, prompt: Prompt) -> str:
def to_rich_prompt(self, **kwargs: Any) -> RichPrompt:
"""Creates a `Prompt` along with metadata from the template string and the given parameters.
Currently, the only metadata returned is information about ranges that are marked in the template.
Provided parameters are passed to `liquid.Template.render`.
Args:
**kwargs: Parameters to enrich prompt with
Currently, the only metadata returned is information about ranges that are marked in the template.
Provided parameters are passed to `liquid.Template.render`.
Returns:
The rendered prompt as a `RichPrompt`
"""
context = PromptRangeContext(
self._template.env,
Expand Down
1 change: 1 addition & 0 deletions src/intelligence_layer/core/tracer/composite_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class CompositeSpan(Generic[SpanVar], CompositeTracer[SpanVar], Span):
Args:
tracers: spans that will be forwarded all subsequent log and span calls.
context: Context of the parent. Defaults to None.
"""

def __init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ def aggregation_overviews(
Args:
aggregation_type: Type of the aggregation.
Returns:
An :class:`Iterable` of :class:`AggregationOverview`s.
Yields:
:class:`AggregationOverview`s.
"""
for aggregation_id in self.aggregation_overview_ids():
aggregation_overview = self.aggregation_overview(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def aggregate_evaluation(
Aggregates :class:`Evaluation`s according to the implementation of :func:`AggregationLogic.aggregate`.
Args:
eval_ids: An overview of the evaluation to be aggregated. Does not include
*eval_ids: An overview of the evaluation to be aggregated. Does not include
actual evaluations as these will be retrieved from the repository.
description: Optional description of the aggregation. Defaults to None.
labels: A list of labels for filtering. Defaults to an empty list.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def dataset(self, dataset_id: str) -> Optional[Dataset]:
def datasets(self) -> Iterable[Dataset]:
"""Returns all :class:`Dataset`s sorted by their ID.
Returns:
:class:`Sequence` of :class:`Dataset`s.
Yields:
:class:`Dataset`s.
"""
for dataset_id in self.dataset_ids():
dataset = self.dataset(dataset_id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def examples(
examples_to_skip = examples_to_skip or frozenset()
answers = "ABCD"
assert input_type == MultipleChoiceInput
assert expected_output_type == str
assert expected_output_type is str
for index, sample in enumerate(self._huggingface_dataset["test"]):
if str(index) not in examples_to_skip:
yield Example(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ def evaluation_overview(self, evaluation_id: str) -> Optional[EvaluationOverview
def evaluation_overviews(self) -> Iterable[EvaluationOverview]:
"""Returns all :class:`EvaluationOverview`s sorted by their ID.
Returns:
:class:`Iterable` of :class:`EvaluationOverview`s.
Yields:
:class:`EvaluationOverview`s.
"""
for eval_id in self.evaluation_overview_ids():
evaluation_overview = self.evaluation_overview(eval_id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def to_record(
Args:
example: The example to be translated.
output: The output of the example that was run.
*output: The output of the example that was run.
Returns:
A :class:`RecordDataSequence` that contains entries that should be evaluated in Argilla.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def submit(
Failed submissions are saved as FailedExampleEvaluations.
Args:
run_ids: The runs to be evaluated. Each run is expected to have the same
*run_ids: The runs to be evaluated. Each run is expected to have the same
dataset as input (which implies their tasks have the same input-type)
and their tasks have the same output-type. For each example in the
dataset referenced by the runs the outputs of all runs are collected
Expand Down Expand Up @@ -91,8 +91,8 @@ def partial_evaluation_overview(
def partial_evaluation_overviews(self) -> Iterable[PartialEvaluationOverview]:
"""Returns all :class:`PartialEvaluationOverview`s sorted by their ID.
Returns:
:class:`Iterable` of :class:`PartialEvaluationOverview`s.
Yields:
:class:`PartialEvaluationOverview`s.
"""
for eval_id in self.partial_evaluation_overview_ids():
evaluation_overview = self.partial_evaluation_overview(eval_id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ def _retrieve_eval_logic_input(
def failed_evaluations(
self, evaluation_id: str
) -> Iterable[EvaluationLineage[Input, ExpectedOutput, Output, Evaluation]]:
"""Returns the `EvaluationLineage` objects for all failed example evalations that belong to the given evaluation ID.
"""Returns the `EvaluationLineage` objects for all failed example evaluations that belong to the given evaluation ID.
Args:
evaluation_id: The ID of the evaluation overview
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def do_evaluate(
Args:
example: Input data of :class:`Task` to produce the output.
output: Output of the :class:`Task`.
*output: Output of the :class:`Task`.
Returns:
The metrics that come from the evaluated :class:`Task`.
Expand Down Expand Up @@ -112,7 +112,7 @@ def evaluate_runs(
stored in the provided :class:`EvaluationRepository`.
Args:
run_ids: The runs to be evaluated. Each run is expected to have the same
*run_ids: The runs to be evaluated. Each run is expected to have the same
dataset as input (which implies their tasks have the same input-type)
and their tasks have the same output-type. For each example in the
dataset referenced by the runs the outputs of all runs are collected
Expand Down
Loading

0 comments on commit 017a3af

Please sign in to comment.