Skip to content

Commit

Permalink
Sync sdk with internal
Browse files Browse the repository at this point in the history
  • Loading branch information
benrules3 committed Jun 27, 2024
1 parent 696e948 commit bcae54c
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 7 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@ on:
workflow_dispatch: {}

jobs:

test_client:
runs-on: large
runs-on: ubuntu-latest

permissions:
contents: read
Expand Down
3 changes: 1 addition & 2 deletions compass_sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
COHERE_API_ENV_VAR,
DEFAULT_COMMANDR_EXTRACTABLE_ATTRIBUTES,
DEFAULT_COMMANDR_PROMPT,
DEFAULT_MAX_TOKENS_METADATA,
DEFAULT_MIN_CHARS_PER_ELEMENT,
DEFAULT_MIN_NUM_CHUNKS_IN_TITLE,
DEFAULT_MIN_NUM_TOKENS_CHUNK,
Expand Down Expand Up @@ -296,7 +295,7 @@ class ParserConfig(ValidatedModel):
num_tokens_overlap: int = DEFAULT_NUM_TOKENS_CHUNK_OVERLAP
min_chunk_tokens: int = DEFAULT_MIN_NUM_TOKENS_CHUNK
num_chunks_in_title: int = DEFAULT_MIN_NUM_CHUNKS_IN_TITLE
max_tokens_metadata: int = DEFAULT_MAX_TOKENS_METADATA
max_tokens_metadata: int = 1000
include_tables: bool = True

# Formatting configuration
Expand Down
2 changes: 0 additions & 2 deletions compass_sdk/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
DEFAULT_NUM_TOKENS_PER_CHUNK = 500
DEFAULT_NUM_TOKENS_CHUNK_OVERLAP = 15
DEFAULT_MIN_NUM_TOKENS_CHUNK = 5
DEFAULT_MAX_TOKENS_METADATA = 50
DEFAULT_MIN_NUM_CHUNKS_IN_TITLE = 1

DEFAULT_WIDTH_HEIGHT_VERTICAL_RATIO = 0.6
NUM_ADDITIONAL_CHARS_FOR_METADATA = 100
SKIP_INFER_TABLE_TYPES = ["jpg", "png", "xls", "xlsx", "heic"]

# Metadata detection constants
Expand Down
2 changes: 1 addition & 1 deletion compass_sdk/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def process_file(
return []
if len(doc.filebytes) > DEFAULT_MAX_ACCEPTED_FILE_SIZE_BYTES:
logger.error(
f"File too large, supported file size is {DEFAULT_MAX_ACCEPTED_FILE_SIZE_BYTES / 1000_1000} "
f"File too large, supported file size is {DEFAULT_MAX_ACCEPTED_FILE_SIZE_BYTES / 1000_000} "
f"mb, filename {doc.metadata.filename}"
)
return []
Expand Down

0 comments on commit bcae54c

Please sign in to comment.