Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Benrules1/sync with internal #6

Merged
merged 2 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@ on:
workflow_dispatch: {}

jobs:

test_client:
runs-on: large
runs-on: ubuntu-latest

permissions:
contents: read
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**/__pycache__
3 changes: 1 addition & 2 deletions compass_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
COHERE_API_ENV_VAR,
DEFAULT_COMMANDR_EXTRACTABLE_ATTRIBUTES,
DEFAULT_COMMANDR_PROMPT,
DEFAULT_MAX_TOKENS_METADATA,
DEFAULT_MIN_CHARS_PER_ELEMENT,
DEFAULT_MIN_NUM_CHUNKS_IN_TITLE,
DEFAULT_MIN_NUM_TOKENS_CHUNK,
Expand Down Expand Up @@ -296,7 +295,7 @@ class ParserConfig(ValidatedModel):
num_tokens_overlap: int = DEFAULT_NUM_TOKENS_CHUNK_OVERLAP
min_chunk_tokens: int = DEFAULT_MIN_NUM_TOKENS_CHUNK
num_chunks_in_title: int = DEFAULT_MIN_NUM_CHUNKS_IN_TITLE
max_tokens_metadata: int = DEFAULT_MAX_TOKENS_METADATA
max_tokens_metadata: int = 1000
include_tables: bool = True

# Formatting configuration
Expand Down
2 changes: 0 additions & 2 deletions compass_sdk/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
DEFAULT_NUM_TOKENS_PER_CHUNK = 500
DEFAULT_NUM_TOKENS_CHUNK_OVERLAP = 15
DEFAULT_MIN_NUM_TOKENS_CHUNK = 5
DEFAULT_MAX_TOKENS_METADATA = 50
DEFAULT_MIN_NUM_CHUNKS_IN_TITLE = 1

DEFAULT_WIDTH_HEIGHT_VERTICAL_RATIO = 0.6
NUM_ADDITIONAL_CHARS_FOR_METADATA = 100
SKIP_INFER_TABLE_TYPES = ["jpg", "png", "xls", "xlsx", "heic"]

# Metadata detection constants
Expand Down
2 changes: 1 addition & 1 deletion compass_sdk/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def process_file(
return []
if len(doc.filebytes) > DEFAULT_MAX_ACCEPTED_FILE_SIZE_BYTES:
logger.error(
f"File too large, supported file size is {DEFAULT_MAX_ACCEPTED_FILE_SIZE_BYTES / 1000_1000} "
f"File too large, supported file size is {DEFAULT_MAX_ACCEPTED_FILE_SIZE_BYTES / 1000_000} "
f"mb, filename {doc.metadata.filename}"
)
return []
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading