Skip to content

Commit

Permalink
Elastic cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
tianjing-li committed Dec 18, 2023
1 parent 085f181 commit 5b80667
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 43 deletions.
3 changes: 3 additions & 0 deletions elastic/.env-template
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,8 @@ ELASTIC_API_KEY=
ELASTIC_USER=elastic
ELASTIC_PASS=pass

# Optional
ELASTIC_SEARCH_LIMIT=10

# Connector Authorization
ELASTIC_CONNECTOR_API_KEY=
11 changes: 8 additions & 3 deletions elastic/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@ Currently this connector will perform full-text search, but only for a single in

## Configuration

You will need to configure this connector with the credentials and path necessary to connect to your Elasticsearch instance.
This connector is currently only configured to search a single index of your ES cluster, you will need to point to it by specifying your `ELASTIC_CLOUD_ID`, `ELASTIC_URL`, and `ELASTIC_INDEX`.
You will need to configure this connector with the connection details and authentication credentials to your Elasticsearch instance. These will need to be set in your environment variables, we recommend creating a `.env` file that you can base off the `.env-template`.

Then, to authorize your connection you will either require an `ELASTIC_API_KEY` _or_ both `ELASTIC_USER` and `ELASTIC_PASS`.
1. To configure your connection details, _either_ `ELASTIC_CLOUD_ID` or `ELASTIC_URL` need to be provided. Then, you will need to specify the `ELASTIC_INDEX` to query.

2. To authorize your connection, supply _either_ `ELASTICS_API_KEY` or both `ELASTIC_USER` and `ELASTIC_PASS`.

Optionally, you can set the `ELASTIC_SEARCH_LIMIT` parameter to determine the maximum number of results returned by a search.

Finally, to protect this connector from abuse, the `ELASTIC_CONNECTOR_API_KEY` environment variable must be set to a secure value that will be used for this connector's own bearer token authentication.

## Development

Expand Down
75 changes: 75 additions & 0 deletions elastic/provider/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from elasticsearch import Elasticsearch
from flask import current_app as app

client = None


class ElasticsearchClient:
def __init__(self, connection_params=None, index=None, search_limit=10):
if not connection_params:
raise ValueError(
"No connection parameters provided to the Elasticsearch "
"client during initialization."
)
if not index:
raise ValueError(
"No index provided to the Elasticsearch "
"client during initialization."
)

self.client = Elasticsearch(**connection_params)
self.index = index
self.search_limit = search_limit

def search(self, query):
es_query_body = {
"query": {"multi_match": {"query": query}},
"highlight": {
"pre_tags": [""],
"post_tags": [""],
"fields": {"content": {}},
},
}

response = self.client.search(
index=self.index, body=es_query_body, size=self.search_limit
)

return response["hits"]["hits"]


def get_client():
global client
if not client:
connection_params = {}

# Retrieve environment details
if cloud_id := app.config.get("CLOUD_ID"):
connection_params["cloud_id"] = cloud_id
elif url := app.config.get("URL"):
connection_params["hosts"] = [url]
else:
raise ValueError(
"To connect to your Elasticsearch instance, either ELASTIC_CLOUD_ID "
"or ELASTIC_URL must be provided as a valid environment variable. "
"See the README for more details."
)

# Retrieve credentials
if api_key := app.config.get("API_KEY"):
connection_params["api_key"] = api_key
elif (user := app.config.get("USER")) and (password := app.config.get("PASS")):
connection_params["basic_auth"] = (user, password)
else:
raise ValueError(
"To authenticate your Elasticsearch connection, either ELASTIC_API_KEY or the ELASTIC_USER "
"and ELASTIC_PASS pair must be provided as valid environment variables. "
"See the README for more details."
)

assert (index := app.config.get("INDEX")), "ELASTIC_INDEX must be set"
search_limit = app.config.get("SEARCH_LIMIT", 10)

client = ElasticsearchClient(connection_params, index, search_limit)

return client
48 changes: 8 additions & 40 deletions elastic/provider/provider.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,22 @@
import logging

from elasticsearch import Elasticsearch
from flask import current_app as app
from .client import get_client

logger = logging.getLogger(__name__)
es_client = None

MIN_TEXT_LENGTH = 25


def create_es_client():
connection_params = {}
def search(query):
elasticsearch_client = get_client()

if cloud_id := app.config.get("CLOUD_ID"):
connection_params["cloud_id"] = cloud_id
elif url := app.config.get("URL"):
connection_params["hosts"] = [url]
else:
raise ValueError("Either ELASTIC_CLOUD_ID or ELASTIC_URL env vars must be set.")
search_results = elasticsearch_client.search(query)

if api_key := app.config.get("API_KEY"):
connection_params["api_key"] = api_key
elif (user := app.config.get("USER")) and (password := app.config.get("PASS")):
connection_params["basic_auth"] = (user, password)
else:
raise ValueError(
"Either ELASTIC_APIKEY or both ELASTIC_USER and ELASTIC_PASS env vars must be set."
)
results = []
for match in search_results:
results.append(serialize_result(match))

return Elasticsearch(**connection_params)
return results


def build_text(match):
Expand Down Expand Up @@ -56,23 +44,3 @@ def serialize_result(match):
**stripped_source,
"text": build_text(match),
}


def search(query):
global es_client

if not es_client:
es_client = create_es_client()

es_query_body = {
"query": {"multi_match": {"query": query}},
"highlight": {"pre_tags": [""], "post_tags": [""], "fields": {"content": {}}},
}

response = es_client.search(index=app.config["INDEX"], body=es_query_body, size=20)

results = []
for match in response["hits"]["hits"]:
results.append(serialize_result(match))

return results

0 comments on commit 5b80667

Please sign in to comment.