Skip to content

Commit

Permalink
fix(bigquery): Account for duplicates in table before merging (#26764)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomasfarias authored Dec 13, 2024
1 parent 2c92fe2 commit 55b525a
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion posthog/temporal/batch_exports/bigquery_batch_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,17 @@ async def amerge_person_tables(

merge_query = f"""
MERGE `{final_table.full_table_id.replace(":", ".", 1)}` final
USING `{stage_table.full_table_id.replace(":", ".", 1)}` stage
USING (
SELECT * FROM
(
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY {",".join(field.name for field in merge_key)}) row_num
FROM
`{stage_table.full_table_id.replace(":", ".", 1)}`
)
WHERE row_num = 1
) stage
{merge_condition}
WHEN MATCHED AND (stage.`{person_version_key}` > final.`{person_version_key}` OR stage.`{person_distinct_id_version_key}` > final.`{person_distinct_id_version_key}`) THEN
Expand Down

0 comments on commit 55b525a

Please sign in to comment.