Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace deprecated confluence group api endpoint #3197

Merged
merged 5 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 1 addition & 18 deletions backend/danswer/connectors/confluence/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,13 @@
from typing import Any
from urllib.parse import quote

from atlassian import Confluence # type: ignore

from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP
from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.confluence.onyx_confluence import build_confluence_client
from danswer.connectors.confluence.onyx_confluence import OnyxConfluence
from danswer.connectors.confluence.utils import attachment_to_content
from danswer.connectors.confluence.utils import build_confluence_client
from danswer.connectors.confluence.utils import build_confluence_document_id
from danswer.connectors.confluence.utils import datetime_from_string
from danswer.connectors.confluence.utils import extract_text_from_confluence_html
Expand Down Expand Up @@ -118,21 +116,6 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None
is_cloud=self.is_cloud,
wiki_base=self.wiki_base,
)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved this to the onyx_confluence.py file

client_without_retries = Confluence(
api_version="cloud" if self.is_cloud else "latest",
url=self.wiki_base.rstrip("/"),
username=credentials["confluence_username"] if self.is_cloud else None,
password=credentials["confluence_access_token"] if self.is_cloud else None,
token=credentials["confluence_access_token"] if not self.is_cloud else None,
)
spaces = client_without_retries.get_all_spaces(limit=1)
if not spaces:
raise RuntimeError(
f"No spaces found at {self.wiki_base}! "
"Check your credentials and wiki_base and make sure "
"is_cloud is set correctly."
)
return None

def _get_comment_string_for_page_id(self, page_id: str) -> str:
Expand Down
70 changes: 66 additions & 4 deletions backend/danswer/connectors/confluence/onyx_confluence.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def _traverse_and_update(data: dict | list) -> None:

def paginated_cql_user_retrieval(
self,
cql: str,
is_cloud: bool,
expand: str | None = None,
limit: int | None = None,
) -> Iterator[dict[str, Any]]:
Expand All @@ -241,10 +241,23 @@ def paginated_cql_user_retrieval(
It's a seperate endpoint from the content/search endpoint used only for users.
Otherwise it's very similar to the content/search endpoint.
"""
cql = quote("type=user")
url = "rest/api/search/user" if is_cloud else "rest/api/search"
expand_string = f"&expand={expand}" if expand else ""
yield from self._paginate_url(
f"rest/api/search/user?cql={cql}{expand_string}", limit
)
url += f"?cql={cql}{expand_string}"
yield from self._paginate_url(url, limit)

def paginated_groups_by_user_retrieval(
self,
user_query: str,
limit: int | None = None,
) -> Iterator[dict[str, Any]]:
"""
This is not an SQL like query.
It's a confluence specific endpoint that can be used to fetch groups.
"""
url = f"rest/api/user/memberof?{user_query}"
yield from self._paginate_url(url, limit)

def paginated_groups_retrieval(
self,
Expand All @@ -264,6 +277,55 @@ def paginated_group_members_retrieval(
"""
This is not an SQL like query.
It's a confluence specific endpoint that can be used to fetch the members of a group.
THIS DOESN'T WORK FOR SERVER because it breaks when there is a slash in the group name.
E.g. neither "test/group" nor "test%2Fgroup" works for confluence.
"""
group_name = quote(group_name)
yield from self._paginate_url(f"rest/api/group/{group_name}/member", limit)


def _validate_connector_configuration(
credentials: dict[str, Any],
is_cloud: bool,
wiki_base: str,
) -> None:
# test connection with direct client, no retries
confluence_client_without_retries = Confluence(
api_version="cloud" if is_cloud else "latest",
url=wiki_base.rstrip("/"),
username=credentials["confluence_username"] if is_cloud else None,
password=credentials["confluence_access_token"] if is_cloud else None,
token=credentials["confluence_access_token"] if not is_cloud else None,
)
spaces = confluence_client_without_retries.get_all_spaces(limit=1)

if not spaces:
raise RuntimeError(
f"No spaces found at {wiki_base}! "
"Check your credentials and wiki_base and make sure "
"is_cloud is set correctly."
)


def build_confluence_client(
credentials: dict[str, Any],
is_cloud: bool,
wiki_base: str,
) -> OnyxConfluence:
_validate_connector_configuration(
credentials=credentials,
is_cloud=is_cloud,
wiki_base=wiki_base,
)
return OnyxConfluence(
api_version="cloud" if is_cloud else "latest",
# Remove trailing slash from wiki_base if present
url=wiki_base.rstrip("/"),
# passing in username causes issues for Confluence data center
username=credentials["confluence_username"] if is_cloud else None,
password=credentials["confluence_access_token"] if is_cloud else None,
token=credentials["confluence_access_token"] if not is_cloud else None,
backoff_and_retry=True,
max_backoff_retries=10,
max_backoff_seconds=60,
)
17 changes: 0 additions & 17 deletions backend/danswer/connectors/confluence/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,20 +269,3 @@ def datetime_from_string(datetime_string: str) -> datetime:
datetime_object = datetime_object.astimezone(timezone.utc)

return datetime_object


Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved this to the onyx_confluence.py file

def build_confluence_client(
credentials_json: dict[str, Any], is_cloud: bool, wiki_base: str
) -> OnyxConfluence:
return OnyxConfluence(
api_version="cloud" if is_cloud else "latest",
# Remove trailing slash from wiki_base if present
url=wiki_base.rstrip("/"),
# passing in username causes issues for Confluence data center
username=credentials_json["confluence_username"] if is_cloud else None,
password=credentials_json["confluence_access_token"] if is_cloud else None,
token=credentials_json["confluence_access_token"] if not is_cloud else None,
backoff_and_retry=True,
max_backoff_retries=10,
max_backoff_seconds=60,
)
76 changes: 40 additions & 36 deletions backend/ee/danswer/external_permissions/confluence/group_sync.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from atlassian import Confluence # type: ignore
from typing import Any

from danswer.connectors.confluence.onyx_confluence import build_confluence_client
from danswer.connectors.confluence.onyx_confluence import OnyxConfluence
from danswer.connectors.confluence.utils import build_confluence_client
from danswer.connectors.confluence.utils import get_user_email_from_username__server
from danswer.db.models import ConnectorCredentialPair
from danswer.utils.logger import setup_logger
Expand All @@ -11,22 +11,46 @@
logger = setup_logger()


def _get_group_members_email_paginated(
def _get_group_ids_for_user(
confluence_client: OnyxConfluence,
group_name: str,
is_cloud: bool,
user: dict[str, Any],
) -> set[str]:
group_member_emails: set[str] = set()
for member in confluence_client.paginated_group_members_retrieval(group_name):
email = member.get("email")
user_field = "accountId" if is_cloud else "key"
user_value = user["accountId"] if is_cloud else user["userKey"]
# Server uses userKey (but calls it key during the API call), Cloud uses accountId
user_query = f"{user_field}={user_value}"

group_ids_for_user: set[str] = set()
for group in confluence_client.paginated_groups_by_user_retrieval(user_query):
group_ids_for_user.add(group["name"])

return group_ids_for_user


def _build_group_member_email_map(
confluence_client: OnyxConfluence,
is_cloud: bool,
) -> dict[str, set[str]]:
group_member_emails: dict[str, set[str]] = {}
for user_result in confluence_client.paginated_cql_user_retrieval(is_cloud):
user = user_result["user"]
email = user.get("email")
if not email:
user_name = member.get("username")
# This field is only present in Confluence Server
user_name = user.get("username")
# If it is present, try to get the email using a Server-specific method
if user_name:
email = get_user_email_from_username__server(
confluence_client=confluence_client,
user_name=user_name,
)
if email:
group_member_emails.add(email)
if not email:
# If we still don't have an email, skip this user
continue

for group_id in _get_group_ids_for_user(confluence_client, is_cloud, user):
group_member_emails.setdefault(group_id, set()).add(email)

return group_member_emails

Expand All @@ -38,41 +62,21 @@ def confluence_group_sync(
is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False)
wiki_base = cc_pair.connector.connector_specific_config["wiki_base"]

# test connection with direct client, no retries
confluence_client = Confluence(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved this to the onyx_confluence.py file

api_version="cloud" if is_cloud else "latest",
url=wiki_base.rstrip("/"),
username=credentials["confluence_username"] if is_cloud else None,
password=credentials["confluence_access_token"] if is_cloud else None,
token=credentials["confluence_access_token"] if not is_cloud else None,
)
spaces = confluence_client.get_all_spaces(limit=1)
if not spaces:
raise RuntimeError(f"No spaces found at {wiki_base}!")

confluence_client = build_confluence_client(
credentials_json=credentials,
is_cloud=is_cloud,
wiki_base=wiki_base,
)

# Get all group names
group_names: list[str] = []
for group in confluence_client.paginated_groups_retrieval():
if group_name := group.get("name"):
group_names.append(group_name)

# For each group name, get all members and create a danswer group
group_member_email_map = _build_group_member_email_map(
confluence_client=confluence_client,
is_cloud=is_cloud,
)
danswer_groups: list[ExternalUserGroup] = []
for group_name in group_names:
group_member_emails = _get_group_members_email_paginated(
confluence_client, group_name
)
if not group_member_emails:
continue
for group_id, group_member_emails in group_member_email_map.items():
danswer_groups.append(
ExternalUserGroup(
id=group_name,
id=group_id,
user_emails=list(group_member_emails),
)
)
Expand Down
Loading