diff --git a/msteams/README.md b/msteams/README.md index d45ae84c4..d83b0bb15 100644 --- a/msteams/README.md +++ b/msteams/README.md @@ -2,7 +2,7 @@ This package is a utility for connecting Cohere to Microsoft Teams. -It uses Microsoft Graph API run the search query and return matching Teams chat messages. +It uses Microsoft Graph API to run the search query and return matching Teams chat messages. ## Limitations @@ -40,7 +40,7 @@ These can be read from a .env file. See `.env-template`. The values for `MSTEAMS_TENANT_ID`, `MSTEAMS_CLIENT_ID` and `MSTEAMS_CLIENT_SECRET` come from Microsoft 365 admin. You must create an app registration in Microsoft 365 admin, and grant -the appropriate permissions. The MSTEAMS_USER_ID represents the user ID of the individual who registered the app. +the appropriate permissions. The `MSTEAMS_USER_ID` represents the user ID of the individual who registered the app. To obtain the user ID, you can use the Microsoft Entra admin center Identity -> Users -> All Users menu and select your user. The user ID is the "Object ID" field. This information is essential for app-only authentication and is relevant to the limitations of the Microsoft Graph API search functionality. @@ -49,14 +49,20 @@ relevant to the limitations of the Microsoft Graph API search functionality. #### OAuth -When using OAuth for authentication, the connector does not require any additional environment variables. Instead, the OAuth flow should occur outside of the Connector and Cohere's API will forward the user's access token to this connector through the `Authorization` header. +When using OAuth for authentication, set the following environment variable: + +- `MSTEAMS_GRAPH_AUTH_TYPE` to `user` + +The OAuth flow should occur outside of the Connector and Cohere's API will forward the user's access token +to this connector through the `Authorization` header. With OAuth the connector will be able to search any Teams chat messages that the user has access to. -To configure OAuth, follow the same steps in the Configuration section to create a Microsoft 365 App. You will also need to register a redirect URI on that app to `https://api.cohere.com/v1/connectors/oauth/token`. +To configure OAuth, follow the same steps in the Configuration section to create a Microsoft 365 App. +You will also need to register a redirect URI on that app to `https://api.cohere.com/v1/connectors/oauth/token`. You can then register the connector with Cohere's API using the following configuration: -Note: Your App key and App secret values correspond to `client_id` and `client_secret` respectively. +Note: Your `MSTEAMS_GRAPH_CLIENT_ID` and `MSTEAMS_GRAPH_CLIENT_SECRET` values correspond to `client_id` and `client_secret` respectively. ```bash curl -X POST \ @@ -70,18 +76,14 @@ curl -X POST \ "oauth": { "client_id": "{Your App CLIENT-ID}", "client_secret": "{Your App CLIENT-SECRET}", - "authorize_url": "https://login.microsoftonline.com/{Your APP Tenant ID}/oauth2/v2.0/authorize" - "token_url": "https://login.microsoftonline.com/{Your APP Tenant ID}/oauth2/v2.0/token" + "authorize_url": "https://login.microsoftonline.com/{Your App TENANT-ID}/oauth2/v2.0/authorize" + "token_url": "https://login.microsoftonline.com/{Your App TENANT-ID}/oauth2/v2.0/token" "scope": ".default offline_access" } }' ``` - - -To make a search requests to Microsoft Graph API, the connector needs to be authenticated with a token obtained using -Oauth 2.0, and passed to the connector in the Authorization header with the Bearer schema. - -The connector will search the chat messages accessible by the user who is authenticated. +No more configuration is needed here and after successfully registration, +Cohere will take care of OAuth steps including passing in the correct headers to your connector. ## Unstructured @@ -119,8 +121,7 @@ https://entra.microsoft.com/ Navigate to "Applications -> App registrations", and use the "New registration" option. Select "Web" as the platform, and ensure you add a redirect URL, even if it is optional. -The redirect URL is required for the admin consent step to work. This connector does not -have a redirect page implemented, but you can use http://localhost/ as the redirect URL. +The redirect URL is required for the admin consent step to work. On the app registration page for the app you have created, go to API permissions, and grant permissions. For development purposes, you can grant: diff --git a/msteams/provider/client.py b/msteams/provider/client.py index a8ec452c3..c61c71311 100644 --- a/msteams/provider/client.py +++ b/msteams/provider/client.py @@ -10,17 +10,18 @@ AUTHORIZATION_HEADER = "Authorization" BEARER_PREFIX = "Bearer " -CACHE_SIZE = 256 class MsTeamsClient: DEFAULT_SCOPES = ["https://graph.microsoft.com/.default"] SEARCH_URL = "https://graph.microsoft.com/v1.0/search/query" + SEARCH_ENTITY_TYPES = ["chatMessage"] APPLICATION_AUTH = "application" DELEGATED_AUTH = "user" def __init__(self, auth_type, search_limit=5): self.access_token = None + self.headers = None self.user = None self.auth_type = auth_type self.search_limit = search_limit @@ -61,6 +62,7 @@ def set_app_access_token(self, tenant_id, client_id, client_secret): "Error while retrieving access token from Microsoft Graph API" ) self.access_token = token_response["access_token"] + self.headers = {"Authorization": f"Bearer {self.access_token}"} except Exception as e: raise UpstreamProviderError( f"Error while initializing Teams client: {str(e)}" @@ -82,7 +84,7 @@ async def _get_message(self, hit): async with self.session.get( url, - headers={"Authorization": f"Bearer {self.access_token}"}, + headers=self.headers, params=params, ) as response: content = await response.json() @@ -107,9 +109,7 @@ async def _prepare_attachment_download_url(self, attachment): graph_api_url = ( f"https://graph.microsoft.com/v1.0/shares/{prepared}/driveItem/content" ) - async with self.session.get( - graph_api_url, headers={"Authorization": f"Bearer {self.access_token}"} - ) as response: + async with self.session.get(graph_api_url, headers=self.headers) as response: content = await response.content.read() if not response.ok: return attachment @@ -146,11 +146,11 @@ def _delegated_search(self, query): results = [] response = requests.post( self.SEARCH_URL, - headers={"Authorization": f"Bearer {self.access_token}"}, + headers=self.headers, json={ "requests": [ { - "entityTypes": ["chatMessage"], + "entityTypes": self.SEARCH_ENTITY_TYPES, "query": { "queryString": query, "size": self.search_limit, @@ -181,19 +181,12 @@ def _app_search(self, query, user=None): f"https://graph.microsoft.com/v1.0/users/{self.user}/chats/getAllMessages" ) - # Set up the request headers - headers = { - "Authorization": f"Bearer {self.access_token}", - "Accept": "application/json", - "Content-Type": "application/json", - } - params = { "$select": "id,subject,summary,body,from,createdDateTime,webUrl,attachments,eventDetail", "$top": self.search_limit, } # Make a request to the Microsoft Graph API to get messages - response = requests.get(graph_api_url, headers=headers, params=params) + response = requests.get(graph_api_url, headers=self.headers, params=params) if not response.ok: raise UpstreamProviderError( f"Error while searching Outlook: {response.text}" diff --git a/msteams/provider/provider.py b/msteams/provider/provider.py index ae3838567..6afaf642b 100644 --- a/msteams/provider/provider.py +++ b/msteams/provider/provider.py @@ -39,32 +39,39 @@ def serialize_results(results): ] -def parse_results_attachments(results): - attachments_to_unstructured = [] +def prepare_attachments_to_parse(results): + attachments_to_parse = [] + for result in results: + attachments_to_parse.extend( + [ + attachment + for attachment in result["attachments"] + if attachment["contentType"] == "reference" + ] + ) + return attachments_to_parse + + +def update_attachments_content(results, parsed_results): for result in results: - attachments = result["attachments"] - if attachments: - for attachment in attachments: - if attachment["contentType"] == "reference": - attachments_to_unstructured.append(attachment) + for attachment in result["attachments"]: + attachment["content"] = ( + parsed_results[attachment["id"]]["content"] + if attachment["id"] in parsed_results + else "" + ) + result["body"]["content"] += attachment["content"] + + +def parse_results_attachments(results): + attachments_to_unstructured = prepare_attachments_to_parse(results) if len(attachments_to_unstructured) > 0: unstructured_client = get_unstructured_client() unstructured_client.start_session() unstructured_results = unstructured_client.batch_get( attachments_to_unstructured ) - for result in results: - attachments = result["attachments"] - if attachments: - for attachment in attachments: - if attachment["contentType"] == "reference": - attachment["content"] = ( - unstructured_results[attachment["id"]]["content"] - if attachment["id"] in unstructured_results - else "" - ) - result["body"]["content"] += attachment["content"] - + update_attachments_content(results, unstructured_results) return results