Skip to content

Commit

Permalink
Remove empty values from final entities (#4)
Browse files Browse the repository at this point in the history
Co-authored-by: Chris Trevino <[email protected]>
  • Loading branch information
AlonsoGuevara and darthtrevino authored Apr 2, 2024
1 parent 7d8bc09 commit 8efe8bf
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 24 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,8 @@ __pycache__
.pipeline

temp_azurite/
__azurite*.json
__azurite*.json

# Getting started example
ragtest/
.ragtest/
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,19 @@ def build_steps(
"args": {"columns": ["id"]},
},
{"verb": "rename", "args": {"columns": {"title": "name"}}},
{
# ELIMINATE EMPTY NAMES
"verb": "filter",
"args": {
"column": "name",
"criteria": [
{
"type": "value",
"operator": "is not empty",
}
],
},
},
{
"verb": "text_split",
"args": {"separator": ",", "column": "source_id", "to": "text_unit_ids"},
Expand All @@ -69,31 +82,46 @@ def build_steps(
})

if not skip_description_embedding:
result.extend([
{
"verb": "merge",
"args": {
"strategy": "concat",
"columns": ["name", "description"],
"to": "name_description",
"delimiter": ":",
"preserveSource": True,
result.extend(
[
{
"verb": "merge",
"args": {
"strategy": "concat",
"columns": ["name", "description"],
"to": "name_description",
"delimiter": ":",
"preserveSource": True,
},
},
},
{
"verb": "text_embed",
"args": {
"column": "name_description",
"to": "description_embedding",
**text_embed_config,
{
"verb": "text_embed",
"args": {
"column": "name_description",
"to": "description_embedding",
**text_embed_config,
},
},
},
{
"verb": "drop",
"args": {
"columns": ["name_description"],
{
"verb": "drop",
"args": {
"columns": ["name_description"],
},
},
},
])
{
# ELIMINATE EMPTY DESCRIPTION EMBEDDINGS
"verb": "filter",
"args": {
"column": "description_embedding",
"criteria": [
{
"type": "value",
"operator": "is not empty",
}
],
},
},
]
)

return result

0 comments on commit 8efe8bf

Please sign in to comment.