Skip to content

Commit

Permalink
[contentful] enabled way to richtext fields (#392)
Browse files Browse the repository at this point in the history
* [contentful] enabled way to richtext fields

Enabled way to explicitly allow mapping richtext fields via
CONTENTFUL_RICHTEXT_FIELDS envionment variable. Default: ["content"].

* fixed default value for RICHTEXT_FIELDS

* fix: formatted code style
  • Loading branch information
bssanchez authored May 3, 2024
1 parent 8bd8976 commit 11cd23f
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 6 deletions.
1 change: 1 addition & 0 deletions contentful/.env-template
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ CONTENTFUL_ENVIRONMENT=master
CONTENTFUL_SEARCH_LIMIT=20
CONTENTFUL_CONTENT_TYPE_SEARCH=
CONTENTFUL_FIELDS_MAPPING=
CONTENTFUL_RICHTEXT_FIELDS=
CONTENTFUL_CONNECTOR_API_KEY=
21 changes: 21 additions & 0 deletions contentful/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,27 @@ for example:
}
```

```
CONTENTFUL_RICHTEXT_FIELDS
```

This variable should contain a JSON array with the identifiers of the fields that are of type richtext to process this information before delivering it to Cohere. By default, the content (["content"]) field will be processed in all content types.

```
[
"contentful_content_type.contentful_field_id",
"contentful_field_id",
...
]
for example:
[
"pageBlogPost.description",
"content",
]
```

## Development

Create a virtual environment and install dependencies with poetry. We recommend using in-project virtual environments:
Expand Down
22 changes: 20 additions & 2 deletions contentful/provider/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,14 @@

class ContentfulApiClient(Client):
def __init__(
self, space_id, access_token, environment, content_type, mapping, search_limit
self,
space_id,
access_token,
environment,
content_type,
richtext_fields,
mapping,
search_limit,
):
super().__init__(
space_id,
Expand All @@ -22,6 +29,7 @@ def __init__(
self.content_type = content_type
self.mapping = mapping
self.search_limit = search_limit
self.richtext_fields = richtext_fields

def get_search_limit(self):
return self.search_limit
Expand All @@ -32,6 +40,9 @@ def get_content_type(self):
def get_mapping(self):
return self.mapping

def get_richtext_fields(self):
return self.richtext_fields


def get_client():
global client
Expand All @@ -45,10 +56,17 @@ def get_client():
limit = app.config.get("SEARCH_LIMIT", 20)
mapping = app.config.get("FIELDS_MAPPING", {})
content_type = app.config.get("CONTENT_TYPE_SEARCH", None)
richtext_fields = app.config.get("RICHTEXT_FIELDS", '["content"]')

if not client:
client = ContentfulApiClient(
space_id, access_token, environment, content_type, mapping, limit
space_id,
access_token,
environment,
content_type,
richtext_fields,
mapping,
limit,
)

return client
12 changes: 8 additions & 4 deletions contentful/provider/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@ def process_content(node):
return str(node)


def serialize_results(results, mapping):
def serialize_results(results, mapping, richtext_fields):
data = []
for result in results:
item = {"content_type": result.content_type.id, "id": result.id}
for key, value in result.fields().items():
item[key] = str(value)
if key == "content":
item[key] = process_content(value)
type_key = f"{result.content_type.id}.{key}"

if any(fieldId in richtext_fields for fieldId in [key, type_key]):
item[key] = process_content(value)

if type_key in mapping:
item[mapping[type_key]] = item.pop(key)

Expand All @@ -42,4 +44,6 @@ def search(query):
params["content_type"] = content_type
results = client.entries(params)

return serialize_results(results.items, client.get_mapping())
return serialize_results(
results.items, client.get_mapping(), client.get_richtext_fields()
)

0 comments on commit 11cd23f

Please sign in to comment.