Skip to content

Commit

Permalink
Add size limit to jira tickets
Browse files Browse the repository at this point in the history
  • Loading branch information
Weves committed Sep 28, 2024
1 parent b73d66c commit 9d69b48
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 1 deletion.
4 changes: 4 additions & 0 deletions backend/danswer/configs/app_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@
for ignored_tag in os.environ.get("JIRA_CONNECTOR_LABELS_TO_SKIP", "").split(",")
if ignored_tag
]
# Maximum size for Jira tickets in bytes (default: 100KB)
JIRA_CONNECTOR_MAX_TICKET_SIZE = int(
os.environ.get("JIRA_CONNECTOR_MAX_TICKET_SIZE", 100 * 1024)
)

GONG_CONNECTOR_START_TIME = os.environ.get("GONG_CONNECTOR_START_TIME")

Expand Down
13 changes: 12 additions & 1 deletion backend/danswer/connectors/danswer_jira/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.app_configs import JIRA_CONNECTOR_LABELS_TO_SKIP
from danswer.configs.app_configs import JIRA_CONNECTOR_MAX_TICKET_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from danswer.connectors.interfaces import GenerateDocumentsOutput
Expand Down Expand Up @@ -134,10 +135,20 @@ def fetch_jira_issues_batch(
else extract_text_from_adf(jira.raw["fields"]["description"])
)
comments = _get_comment_strs(jira, comment_email_blacklist)
semantic_rep = f"{description}\n" + "\n".join(
ticket_content = f"{description}\n" + "\n".join(
[f"Comment: {comment}" for comment in comments if comment]
)

# Check ticket size
if len(ticket_content.encode("utf-8")) > JIRA_CONNECTOR_MAX_TICKET_SIZE:
logger.info(
f"Skipping {jira.key} because it exceeds the maximum size of "
f"{JIRA_CONNECTOR_MAX_TICKET_SIZE} bytes."
)
continue

semantic_rep = ticket_content

page_url = f"{jira_client.client_info()}/browse/{jira.key}"

people = set()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest
from jira.resources import Issue

from danswer.connectors.danswer_jira.connector import fetch_jira_issues_batch


@pytest.fixture
def mock_jira_client():
return MagicMock()


@pytest.fixture
def mock_issue_small():
issue = MagicMock()
issue.key = "SMALL-1"
issue.fields.description = "Small description"
issue.fields.comment.comments = [
MagicMock(body="Small comment 1"),
MagicMock(body="Small comment 2"),
]
issue.fields.creator.displayName = "John Doe"
issue.fields.creator.emailAddress = "[email protected]"
issue.fields.summary = "Small Issue"
issue.fields.updated = "2023-01-01T00:00:00+0000"
issue.fields.labels = []
return issue


@pytest.fixture
def mock_issue_large():
# This will be larger than 100KB
issue = MagicMock()
issue.key = "LARGE-1"
issue.fields.description = "a" * 99_000
issue.fields.comment.comments = [
MagicMock(body="Large comment " * 1000),
MagicMock(body="Another large comment " * 1000),
]
issue.fields.creator.displayName = "Jane Doe"
issue.fields.creator.emailAddress = "[email protected]"
issue.fields.summary = "Large Issue"
issue.fields.updated = "2023-01-02T00:00:00+0000"
issue.fields.labels = []
return issue


@pytest.fixture
def patched_type():
def _patched_type(obj):
if isinstance(obj, MagicMock):
return Issue
return type(obj)

return _patched_type


@pytest.fixture
def mock_jira_api_version():
with patch("danswer.connectors.danswer_jira.connector.JIRA_API_VERSION", "2"):
yield


@pytest.fixture
def patched_environment(patched_type, mock_jira_api_version):
with patch("danswer.connectors.danswer_jira.connector.type", patched_type):
yield


def test_fetch_jira_issues_batch_small_ticket(
mock_jira_client, mock_issue_small, patched_environment
):
mock_jira_client.search_issues.return_value = [mock_issue_small]

docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)

assert count == 1
assert len(docs) == 1
assert docs[0].id.endswith("/SMALL-1")
assert "Small description" in docs[0].sections[0].text
assert "Small comment 1" in docs[0].sections[0].text
assert "Small comment 2" in docs[0].sections[0].text


def test_fetch_jira_issues_batch_large_ticket(
mock_jira_client, mock_issue_large, patched_environment
):
mock_jira_client.search_issues.return_value = [mock_issue_large]

docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)

assert count == 1
assert len(docs) == 0 # The large ticket should be skipped


def test_fetch_jira_issues_batch_mixed_tickets(
mock_jira_client, mock_issue_small, mock_issue_large, patched_environment
):
mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]

docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)

assert count == 2
assert len(docs) == 1 # Only the small ticket should be included
assert docs[0].id.endswith("/SMALL-1")


@patch("danswer.connectors.danswer_jira.connector.JIRA_CONNECTOR_MAX_TICKET_SIZE", 50)
def test_fetch_jira_issues_batch_custom_size_limit(
mock_jira_client, mock_issue_small, mock_issue_large, patched_environment
):
mock_jira_client.search_issues.return_value = [mock_issue_small, mock_issue_large]

docs, count = fetch_jira_issues_batch("project = TEST", 0, mock_jira_client)

assert count == 2
assert len(docs) == 0 # Both tickets should be skipped due to the low size limit

0 comments on commit 9d69b48

Please sign in to comment.