Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Elasticsearch] Improvements #21

Merged
merged 2 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions elastic/.env-template
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,8 @@ ELASTIC_API_KEY=
ELASTIC_USER=elastic
ELASTIC_PASS=pass

# Optional
ELASTIC_SEARCH_LIMIT=10

# Connector Authorization
ELASTIC_CONNECTOR_API_KEY=
11 changes: 8 additions & 3 deletions elastic/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@ Currently this connector will perform full-text search, but only for a single in

## Configuration

You will need to configure this connector with the credentials and path necessary to connect to your Elasticsearch instance.
This connector is currently only configured to search a single index of your ES cluster, you will need to point to it by specifying your `ELASTIC_CLOUD_ID`, `ELASTIC_URL`, and `ELASTIC_INDEX`.
You will need to configure this connector with the connection details and authentication credentials to your Elasticsearch instance. These will need to be set in your environment variables, we recommend creating a `.env` file that you can base off the `.env-template`.

Then, to authorize your connection you will either require an `ELASTIC_API_KEY` _or_ both `ELASTIC_USER` and `ELASTIC_PASS`.
1. To configure your connection details, _either_ `ELASTIC_CLOUD_ID` or `ELASTIC_URL` need to be provided. Then, you will need to specify the `ELASTIC_INDEX` to query.

2. To authorize your connection, supply _either_ `ELASTICS_API_KEY` or both `ELASTIC_USER` and `ELASTIC_PASS`.

Optionally, you can set the `ELASTIC_SEARCH_LIMIT` parameter to determine the maximum number of results returned by a search.

Finally, to protect this connector from abuse, the `ELASTIC_CONNECTOR_API_KEY` environment variable must be set to a secure value that will be used for this connector's own bearer token authentication.

## Development

Expand Down
82 changes: 82 additions & 0 deletions elastic/provider/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
from elasticsearch import Elasticsearch
from flask import current_app as app

from . import UpstreamProviderError

client = None


class ElasticsearchClient:
def __init__(self, connection_params=None, index=None, search_limit=10):
if not connection_params:
raise ValueError(
"No connection parameters provided to the Elasticsearch "
"client during initialization."
)
if not index:
raise ValueError(
"No index provided to the Elasticsearch "
"client during initialization."
)

self.client = Elasticsearch(**connection_params)
self.index = index
self.search_limit = search_limit

def search(self, query):
es_query_body = {
"query": {"multi_match": {"query": query}},
"highlight": {
"pre_tags": [""],
"post_tags": [""],
"fields": {"content": {}},
},
}

response = self.client.search(
index=self.index, body=es_query_body, size=self.search_limit
)

if response.get("hits", {}).get("hits") is None:
raise UpstreamProviderError(
"Error while searching Elasticsearch with " f"query: '{query}'."
)

return response["hits"]["hits"]
tianjing-li marked this conversation as resolved.
Show resolved Hide resolved


def get_client():
global client
if not client:
connection_params = {}

# Retrieve environment details
if cloud_id := app.config.get("CLOUD_ID"):
connection_params["cloud_id"] = cloud_id
elif url := app.config.get("URL"):
connection_params["hosts"] = [url]
else:
raise ValueError(
"To connect to your Elasticsearch instance, either ELASTIC_CLOUD_ID "
"or ELASTIC_URL must be provided as a valid environment variable. "
"See the README for more details."
)

# Retrieve credentials
if api_key := app.config.get("API_KEY"):
connection_params["api_key"] = api_key
elif (user := app.config.get("USER")) and (password := app.config.get("PASS")):
connection_params["basic_auth"] = (user, password)
else:
raise ValueError(
"To authenticate your Elasticsearch connection, either ELASTIC_API_KEY or the ELASTIC_USER "
"and ELASTIC_PASS pair must be provided as valid environment variables. "
"See the README for more details."
)

assert (index := app.config.get("INDEX")), "ELASTIC_INDEX must be set"
search_limit = app.config.get("SEARCH_LIMIT", 10)

client = ElasticsearchClient(connection_params, index, search_limit)

return client
48 changes: 8 additions & 40 deletions elastic/provider/provider.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,22 @@
import logging

from elasticsearch import Elasticsearch
from flask import current_app as app
from .client import get_client

logger = logging.getLogger(__name__)
es_client = None

MIN_TEXT_LENGTH = 25


def create_es_client():
connection_params = {}
def search(query):
elasticsearch_client = get_client()

if cloud_id := app.config.get("CLOUD_ID"):
connection_params["cloud_id"] = cloud_id
elif url := app.config.get("URL"):
connection_params["hosts"] = [url]
else:
raise ValueError("Either ELASTIC_CLOUD_ID or ELASTIC_URL env vars must be set.")
search_results = elasticsearch_client.search(query)

if api_key := app.config.get("API_KEY"):
connection_params["api_key"] = api_key
elif (user := app.config.get("USER")) and (password := app.config.get("PASS")):
connection_params["basic_auth"] = (user, password)
else:
raise ValueError(
"Either ELASTIC_APIKEY or both ELASTIC_USER and ELASTIC_PASS env vars must be set."
)
results = []
for match in search_results:
results.append(serialize_result(match))

return Elasticsearch(**connection_params)
return results


def build_text(match):
Expand Down Expand Up @@ -56,23 +44,3 @@ def serialize_result(match):
**stripped_source,
"text": build_text(match),
}


def search(query):
global es_client

if not es_client:
es_client = create_es_client()

es_query_body = {
"query": {"multi_match": {"query": query}},
"highlight": {"pre_tags": [""], "post_tags": [""], "fields": {"content": {}}},
}

response = es_client.search(index=app.config["INDEX"], body=es_query_body, size=20)

results = []
for match in response["hits"]["hits"]:
results.append(serialize_result(match))

return results