From ebc49ff64607d5fdfa5e3e7f000cd0ee32cf0dfe Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Wed, 20 Sep 2023 15:07:29 +0200 Subject: [PATCH 01/35] migrator: add gh actions test data --- .../github/testdata/hook_disable.jsonl | 1 + .../github/testdata/hook_enable_step1.json | 1 + .../github/testdata/hook_enable_step2.jsonl | 2 + .../actions/github/testdata/hook_update.jsonl | 1 + .../github/testdata/release_create.jsonl | 50 +++++++++++++++++++ .../github/testdata/release_update.jsonl | 1 + .../github/testdata/repo_release.jsonl | 2 + .../actions/github/testdata/repo_update.jsonl | 1 + 8 files changed, 59 insertions(+) create mode 100644 migrator/tests/actions/github/testdata/hook_disable.jsonl create mode 100644 migrator/tests/actions/github/testdata/hook_enable_step1.json create mode 100644 migrator/tests/actions/github/testdata/hook_enable_step2.jsonl create mode 100644 migrator/tests/actions/github/testdata/hook_update.jsonl create mode 100644 migrator/tests/actions/github/testdata/release_create.jsonl create mode 100644 migrator/tests/actions/github/testdata/release_update.jsonl create mode 100644 migrator/tests/actions/github/testdata/repo_release.jsonl create mode 100644 migrator/tests/actions/github/testdata/repo_update.jsonl diff --git a/migrator/tests/actions/github/testdata/hook_disable.jsonl b/migrator/tests/actions/github/testdata/hook_disable.jsonl new file mode 100644 index 00000000..062f2102 --- /dev/null +++ b/migrator/tests/actions/github/testdata/hook_disable.jsonl @@ -0,0 +1 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20671, "timestamp": 1695210835244, "timestamp_type": 0, "key": {"id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "__dbz__physicalTableIdentifier": "zenodo-qa.public.github_repositories"}, "value": {"before": {"created": 1636638953162187, "updated": 1695210768809652, "id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "github_id": 427018972, "name": "ppanero/zenodo-release-test", "user_id": 86490, "ping": 1695210684525290, "hook": 434419984}, "after": {"created": 1636638953162187, "updated": 1695210834960263, "id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "github_id": 427018972, "name": "ppanero/zenodo-release-test", "user_id": null, "ping": 1695210684525290, "hook": null}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210834962, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473226150104\",\"1473226150224\"]", "schema": "public", "table": "github_repositories", "txId": 563886571, "lsn": 1473226150224, "xmin": null}, "op": "u", "ts_ms": 1695210835060, "transaction": {"id": "563886571:1473226150224", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 117, "serialized_value_size": 846, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/tests/actions/github/testdata/hook_enable_step1.json b/migrator/tests/actions/github/testdata/hook_enable_step1.json new file mode 100644 index 00000000..9784445b --- /dev/null +++ b/migrator/tests/actions/github/testdata/hook_enable_step1.json @@ -0,0 +1 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20675, "timestamp": 1695210862298, "timestamp_type": 0, "key": {"id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "__dbz__physicalTableIdentifier": "zenodo-qa.public.github_repositories"}, "value": {"before": {"created": 1636638953162187, "updated": 1695210834960263, "id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "github_id": 427018972, "name": "ppanero/zenodo-release-test", "user_id": null, "ping": 1695210684525290, "hook": null}, "after": {"created": 1636638953162187, "updated": 1695210861469381, "id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "github_id": 427018972, "name": "ppanero/zenodo-release-test", "user_id": 86490, "ping": 1695210684525290, "hook": 434420608}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210861471, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473226161800\",\"1473226161920\"]", "schema": "public", "table": "github_repositories", "txId": 563886578, "lsn": 1473226161920, "xmin": null}, "op": "u", "ts_ms": 1695210861870, "transaction": {"id": "563886578:1473226161920", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 117, "serialized_value_size": 846, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/tests/actions/github/testdata/hook_enable_step2.jsonl b/migrator/tests/actions/github/testdata/hook_enable_step2.jsonl new file mode 100644 index 00000000..7546d803 --- /dev/null +++ b/migrator/tests/actions/github/testdata/hook_enable_step2.jsonl @@ -0,0 +1,2 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20676, "timestamp": 1695210863300, "timestamp_type": 0, "key": {"id": 157872, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauth2server_token"}, "value": {"before": {"id": 157872, "client_id": "LwpyqQN5G32O1XFuPK2DdI0B5MiEjgMsQwKmtgKm", "user_id": 86490, "token_type": "bearer", "access_token": "aHNPTlJQR2VIQldqZE5tZzE5cnJIdzdyWkhRRk5DVVdJSzJtallYVXJIVklnMkFjQlh3WVZ0Vnk5TkFVWU8xcFJMd2NlZWRyRDhqMEdERmU0NWpVcVE9PQ==", "refresh_token": null, "expires": 1695214368940479, "_scopes": "webhooks:event", "is_personal": true, "is_internal": true}, "after": {"id": 157872, "client_id": "LwpyqQN5G32O1XFuPK2DdI0B5MiEjgMsQwKmtgKm", "user_id": 86490, "token_type": "bearer", "access_token": "aHNPTlJQR2VIQldqZE5tZzE5cnJIdzdyWkhRRk5DVVdJSzJtallYVXJIVklnMkFjQlh3WVZ0Vnk5TkFVWU8xcFJMd2NlZWRyRDhqMEdERmU0NWpVcVE9PQ==", "refresh_token": null, "expires": 1695214462557405, "_scopes": "webhooks:event", "is_personal": true, "is_internal": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210862573, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473226162280\",\"1473226162280\"]", "schema": "public", "table": "oauth2server_token", "txId": 563886579, "lsn": 1473226162280, "xmin": null}, "op": "u", "ts_ms": 1695210862882, "transaction": {"id": "563886579:1473226162280", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 84, "serialized_value_size": 1135, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20677, "timestamp": 1695210863300, "timestamp_type": 0, "key": {"id": "f4dc4bc2-e157-4612-8600-0e9b4ba0f450", "__dbz__physicalTableIdentifier": "zenodo-qa.public.webhooks_events"}, "value": {"before": null, "after": {"created": 1695210862569231, "updated": 1695210862569242, "id": "f4dc4bc2-e157-4612-8600-0e9b4ba0f450", "receiver_id": "github", "user_id": 86490, "payload": "{\"hook_id\": 434420608, \"hook\": {\"deliveries_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks/434420608/deliveries\", \"name\": \"web\", \"url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks/434420608\", \"type\": \"Repository\", \"created_at\": \"2023-09-20T11:54:21Z\", \"updated_at\": \"2023-09-20T11:54:21Z\", \"test_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks/434420608/test\", \"id\": 434420608, \"ping_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks/434420608/pings\", \"active\": true, \"config\": {\"url\": \"https://sandbox.zenodo.org/api/hooks/receivers/github/events/?access_token=blX2X1zR6N8M0eN3yCdKS6lJ39ZbprdMgvz65rVfkTgy6WCpdDgL7bhiIj1S\", \"insecure_ssl\": \"0\", \"secret\": \"********\", \"content_type\": \"json\"}, \"events\": [\"release\"], \"last_response\": {\"status\": \"unused\", \"message\": null, \"code\": null}}, \"zen\": \"Design for failure.\", \"sender\": {\"following_url\": \"https://api.github.com/users/ppanero/following{/other_user}\", \"events_url\": \"https://api.github.com/users/ppanero/events{/privacy}\", \"avatar_url\": \"https://avatars.githubusercontent.com/u/6756943?v=4\", \"url\": \"https://api.github.com/users/ppanero\", \"gists_url\": \"https://api.github.com/users/ppanero/gists{/gist_id}\", \"html_url\": \"https://github.com/ppanero\", \"subscriptions_url\": \"https://api.github.com/users/ppanero/subscriptions\", \"node_id\": \"MDQ6VXNlcjY3NTY5NDM=\", \"repos_url\": \"https://api.github.com/users/ppanero/repos\", \"received_events_url\": \"https://api.github.com/users/ppanero/received_events\", \"gravatar_id\": \"\", \"starred_url\": \"https://api.github.com/users/ppanero/starred{/owner}{/repo}\", \"site_admin\": false, \"login\": \"ppanero\", \"type\": \"User\", \"id\": 6756943, \"followers_url\": \"https://api.github.com/users/ppanero/followers\", \"organizations_url\": \"https://api.github.com/users/ppanero/orgs\"}, \"repository\": {\"issues_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues{/number}\", \"deployments_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/deployments\", \"stargazers_count\": 0, \"forks_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/forks\", \"mirror_url\": null, \"allow_forking\": true, \"subscription_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/subscription\", \"topics\": [], \"notifications_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/notifications{?since,all,participating}\", \"collaborators_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/collaborators{/collaborator}\", \"updated_at\": \"2021-12-02T13:03:26Z\", \"private\": false, \"pulls_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/pulls{/number}\", \"disabled\": false, \"issue_comment_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues/comments{/number}\", \"labels_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/labels{/name}\", \"has_wiki\": true, \"full_name\": \"ppanero/zenodo-release-test\", \"owner\": {\"following_url\": \"https://api.github.com/users/ppanero/following{/other_user}\", \"events_url\": \"https://api.github.com/users/ppanero/events{/privacy}\", \"avatar_url\": \"https://avatars.githubusercontent.com/u/6756943?v=4\", \"url\": \"https://api.github.com/users/ppanero\", \"gists_url\": \"https://api.github.com/users/ppanero/gists{/gist_id}\", \"html_url\": \"https://github.com/ppanero\", \"subscriptions_url\": \"https://api.github.com/users/ppanero/subscriptions\", \"node_id\": \"MDQ6VXNlcjY3NTY5NDM=\", \"repos_url\": \"https://api.github.com/users/ppanero/repos\", \"received_events_url\": \"https://api.github.com/users/ppanero/received_events\", \"gravatar_id\": \"\", \"starred_url\": \"https://api.github.com/users/ppanero/starred{/owner}{/repo}\", \"site_admin\": false, \"login\": \"ppanero\", \"type\": \"User\", \"id\": 6756943, \"followers_url\": \"https://api.github.com/users/ppanero/followers\", \"organizations_url\": \"https://api.github.com/users/ppanero/orgs\"}, \"statuses_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/statuses/{sha}\", \"id\": 427018972, \"keys_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/keys{/key_id}\", \"description\": null, \"tags_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/tags\", \"archived\": false, \"downloads_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/downloads\", \"assignees_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/assignees{/user}\", \"watchers\": 0, \"contents_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/contents/{+path}\", \"has_pages\": false, \"git_refs_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/refs{/sha}\", \"has_discussions\": false, \"has_projects\": true, \"clone_url\": \"https://github.com/ppanero/zenodo-release-test.git\", \"watchers_count\": 0, \"git_tags_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/tags{/sha}\", \"milestones_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/milestones{/number}\", \"languages_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/languages\", \"size\": 8, \"homepage\": null, \"fork\": false, \"commits_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/commits{/sha}\", \"releases_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/releases{/id}\", \"issue_events_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues/events{/number}\", \"archive_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/{archive_format}{/ref}\", \"comments_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/comments{/number}\", \"events_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/events\", \"contributors_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/contributors\", \"html_url\": \"https://github.com/ppanero/zenodo-release-test\", \"visibility\": \"public\", \"forks\": 0, \"compare_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/compare/{base}...{head}\", \"open_issues\": 0, \"node_id\": \"R_kgDOGXPK3A\", \"git_url\": \"git://github.com/ppanero/zenodo-release-test.git\", \"svn_url\": \"https://github.com/ppanero/zenodo-release-test\", \"merges_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/merges\", \"has_issues\": true, \"ssh_url\": \"git@github.com:ppanero/zenodo-release-test.git\", \"blobs_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/blobs{/sha}\", \"git_commits_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/commits{/sha}\", \"hooks_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks\", \"has_downloads\": true, \"license\": {\"spdx_id\": \"MPL-2.0\", \"url\": \"https://api.github.com/licenses/mpl-2.0\", \"node_id\": \"MDc6TGljZW5zZTE0\", \"name\": \"Mozilla Public License 2.0\", \"key\": \"mpl-2.0\"}, \"name\": \"zenodo-release-test\", \"language\": null, \"url\": \"https://api.github.com/repos/ppanero/zenodo-release-test\", \"created_at\": \"2021-11-11T13:53:02Z\", \"open_issues_count\": 0, \"is_template\": false, \"pushed_at\": \"2023-09-20T11:50:38Z\", \"web_commit_signoff_required\": false, \"forks_count\": 0, \"default_branch\": \"main\", \"teams_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/teams\", \"trees_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/trees{/sha}\", \"branches_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/branches{/branch}\", \"subscribers_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/subscribers\", \"stargazers_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/stargazers\"}}", "payload_headers": null, "response": "{\"status\": 202, \"message\": \"Accepted.\"}", "response_headers": null, "response_code": 202}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210862573, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473226162280\",\"1473226164816\"]", "schema": "public", "table": "webhooks_events", "txId": 563886579, "lsn": 1473226164816, "xmin": null}, "op": "c", "ts_ms": 1695210862882, "transaction": {"id": "563886579:1473226164816", "total_order": 2, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 113, "serialized_value_size": 8636, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/tests/actions/github/testdata/hook_update.jsonl b/migrator/tests/actions/github/testdata/hook_update.jsonl new file mode 100644 index 00000000..8c86108d --- /dev/null +++ b/migrator/tests/actions/github/testdata/hook_update.jsonl @@ -0,0 +1 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20611, "timestamp": 1695210769612, "timestamp_type": 0, "key": {"id": "189d88dd-22d9-40d1-b3af-9da4b2bc4870", "__dbz__physicalTableIdentifier": "zenodo-qa.public.webhooks_events"}, "value": {"before": {"created": 1695210768902757, "updated": 1695210768902771, "id": "189d88dd-22d9-40d1-b3af-9da4b2bc4870", "receiver_id": "github", "user_id": 86490, "payload": "{\"action\": \"published\", \"release\": {\"body\": \"Zenodo testing migration\", \"assets_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/releases/121854239/assets\", \"name\": \"more and more\", \"published_at\": \"2023-09-20T11:52:47Z\", \"author\": {\"following_url\": \"https://api.github.com/users/ppanero/following{/other_user}\", \"events_url\": \"https://api.github.com/users/ppanero/events{/privacy}\", \"avatar_url\": \"https://avatars.githubusercontent.com/u/6756943?v=4\", \"url\": \"https://api.github.com/users/ppanero\", \"gists_url\": \"https://api.github.com/users/ppanero/gists{/gist_id}\", \"html_url\": \"https://github.com/ppanero\", \"subscriptions_url\": \"https://api.github.com/users/ppanero/subscriptions\", \"node_id\": \"MDQ6VXNlcjY3NTY5NDM=\", \"repos_url\": \"https://api.github.com/users/ppanero/repos\", \"received_events_url\": \"https://api.github.com/users/ppanero/received_events\", \"gravatar_id\": \"\", \"starred_url\": \"https://api.github.com/users/ppanero/starred{/owner}{/repo}\", \"site_admin\": false, \"login\": \"ppanero\", \"type\": \"User\", \"id\": 6756943, \"followers_url\": \"https://api.github.com/users/ppanero/followers\", \"organizations_url\": \"https://api.github.com/users/ppanero/orgs\"}, \"url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/releases/121854239\", \"created_at\": \"2023-09-20T11:50:17Z\", \"target_commitish\": \"main\", \"tarball_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/tarball/v4\", \"html_url\": \"https://github.com/ppanero/zenodo-release-test/releases/tag/v4\", \"zipball_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/zipball/v4\", \"tag_name\": \"v4\", \"node_id\": \"RE_kwDOGXPK3M4HQ1kf\", \"draft\": false, \"prerelease\": false, \"upload_url\": \"https://uploads.github.com/repos/ppanero/zenodo-release-test/releases/121854239/assets{?name,label}\", \"id\": 121854239, \"assets\": []}, \"sender\": {\"following_url\": \"https://api.github.com/users/ppanero/following{/other_user}\", \"events_url\": \"https://api.github.com/users/ppanero/events{/privacy}\", \"avatar_url\": \"https://avatars.githubusercontent.com/u/6756943?v=4\", \"url\": \"https://api.github.com/users/ppanero\", \"gists_url\": \"https://api.github.com/users/ppanero/gists{/gist_id}\", \"html_url\": \"https://github.com/ppanero\", \"subscriptions_url\": \"https://api.github.com/users/ppanero/subscriptions\", \"node_id\": \"MDQ6VXNlcjY3NTY5NDM=\", \"repos_url\": \"https://api.github.com/users/ppanero/repos\", \"received_events_url\": \"https://api.github.com/users/ppanero/received_events\", \"gravatar_id\": \"\", \"starred_url\": \"https://api.github.com/users/ppanero/starred{/owner}{/repo}\", \"site_admin\": false, \"login\": \"ppanero\", \"type\": \"User\", \"id\": 6756943, \"followers_url\": \"https://api.github.com/users/ppanero/followers\", \"organizations_url\": \"https://api.github.com/users/ppanero/orgs\"}, \"repository\": {\"issues_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues{/number}\", \"deployments_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/deployments\", \"stargazers_count\": 0, \"forks_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/forks\", \"mirror_url\": null, \"allow_forking\": true, \"subscription_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/subscription\", \"topics\": [], \"notifications_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/notifications{?since,all,participating}\", \"collaborators_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/collaborators{/collaborator}\", \"updated_at\": \"2021-12-02T13:03:26Z\", \"private\": false, \"pulls_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/pulls{/number}\", \"disabled\": false, \"issue_comment_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues/comments{/number}\", \"labels_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/labels{/name}\", \"has_wiki\": true, \"full_name\": \"ppanero/zenodo-release-test\", \"owner\": {\"following_url\": \"https://api.github.com/users/ppanero/following{/other_user}\", \"events_url\": \"https://api.github.com/users/ppanero/events{/privacy}\", \"avatar_url\": \"https://avatars.githubusercontent.com/u/6756943?v=4\", \"url\": \"https://api.github.com/users/ppanero\", \"gists_url\": \"https://api.github.com/users/ppanero/gists{/gist_id}\", \"html_url\": \"https://github.com/ppanero\", \"subscriptions_url\": \"https://api.github.com/users/ppanero/subscriptions\", \"node_id\": \"MDQ6VXNlcjY3NTY5NDM=\", \"repos_url\": \"https://api.github.com/users/ppanero/repos\", \"received_events_url\": \"https://api.github.com/users/ppanero/received_events\", \"gravatar_id\": \"\", \"starred_url\": \"https://api.github.com/users/ppanero/starred{/owner}{/repo}\", \"site_admin\": false, \"login\": \"ppanero\", \"type\": \"User\", \"id\": 6756943, \"followers_url\": \"https://api.github.com/users/ppanero/followers\", \"organizations_url\": \"https://api.github.com/users/ppanero/orgs\"}, \"statuses_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/statuses/{sha}\", \"id\": 427018972, \"keys_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/keys{/key_id}\", \"description\": null, \"tags_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/tags\", \"archived\": false, \"downloads_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/downloads\", \"assignees_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/assignees{/user}\", \"watchers\": 0, \"contents_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/contents/{+path}\", \"has_pages\": false, \"git_refs_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/refs{/sha}\", \"has_discussions\": false, \"has_projects\": true, \"clone_url\": \"https://github.com/ppanero/zenodo-release-test.git\", \"watchers_count\": 0, \"git_tags_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/tags{/sha}\", \"milestones_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/milestones{/number}\", \"languages_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/languages\", \"size\": 8, \"homepage\": null, \"fork\": false, \"commits_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/commits{/sha}\", \"releases_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/releases{/id}\", \"issue_events_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues/events{/number}\", \"archive_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/{archive_format}{/ref}\", \"comments_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/comments{/number}\", \"events_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/events\", \"contributors_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/contributors\", \"html_url\": \"https://github.com/ppanero/zenodo-release-test\", \"visibility\": \"public\", \"forks\": 0, \"compare_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/compare/{base}...{head}\", \"open_issues\": 0, \"node_id\": \"R_kgDOGXPK3A\", \"git_url\": \"git://github.com/ppanero/zenodo-release-test.git\", \"svn_url\": \"https://github.com/ppanero/zenodo-release-test\", \"merges_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/merges\", \"has_issues\": true, \"ssh_url\": \"git@github.com:ppanero/zenodo-release-test.git\", \"blobs_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/blobs{/sha}\", \"git_commits_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/commits{/sha}\", \"hooks_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks\", \"has_downloads\": true, \"license\": {\"spdx_id\": \"MPL-2.0\", \"url\": \"https://api.github.com/licenses/mpl-2.0\", \"node_id\": \"MDc6TGljZW5zZTE0\", \"name\": \"Mozilla Public License 2.0\", \"key\": \"mpl-2.0\"}, \"name\": \"zenodo-release-test\", \"language\": null, \"url\": \"https://api.github.com/repos/ppanero/zenodo-release-test\", \"created_at\": \"2021-11-11T13:53:02Z\", \"open_issues_count\": 0, \"is_template\": false, \"pushed_at\": \"2023-09-20T11:50:38Z\", \"web_commit_signoff_required\": false, \"forks_count\": 0, \"default_branch\": \"main\", \"teams_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/teams\", \"trees_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/trees{/sha}\", \"branches_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/branches{/branch}\", \"subscribers_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/subscribers\", \"stargazers_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/stargazers\"}}", "payload_headers": null, "response": "{\"status\": 202, \"message\": \"Accepted.\"}", "response_headers": null, "response_code": 202}, "after": {"created": 1695210768902757, "updated": 1695210768930427, "id": "189d88dd-22d9-40d1-b3af-9da4b2bc4870", "receiver_id": "github", "user_id": 86490, "payload": "{\"action\": \"published\", \"release\": {\"body\": \"Zenodo testing migration\", \"assets_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/releases/121854239/assets\", \"name\": \"more and more\", \"published_at\": \"2023-09-20T11:52:47Z\", \"author\": {\"following_url\": \"https://api.github.com/users/ppanero/following{/other_user}\", \"events_url\": \"https://api.github.com/users/ppanero/events{/privacy}\", \"avatar_url\": \"https://avatars.githubusercontent.com/u/6756943?v=4\", \"url\": \"https://api.github.com/users/ppanero\", \"gists_url\": \"https://api.github.com/users/ppanero/gists{/gist_id}\", \"html_url\": \"https://github.com/ppanero\", \"subscriptions_url\": \"https://api.github.com/users/ppanero/subscriptions\", \"node_id\": \"MDQ6VXNlcjY3NTY5NDM=\", \"repos_url\": \"https://api.github.com/users/ppanero/repos\", \"received_events_url\": \"https://api.github.com/users/ppanero/received_events\", \"gravatar_id\": \"\", \"starred_url\": \"https://api.github.com/users/ppanero/starred{/owner}{/repo}\", \"site_admin\": false, \"login\": \"ppanero\", \"type\": \"User\", \"id\": 6756943, \"followers_url\": \"https://api.github.com/users/ppanero/followers\", \"organizations_url\": \"https://api.github.com/users/ppanero/orgs\"}, \"url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/releases/121854239\", \"created_at\": \"2023-09-20T11:50:17Z\", \"target_commitish\": \"main\", \"tarball_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/tarball/v4\", \"html_url\": \"https://github.com/ppanero/zenodo-release-test/releases/tag/v4\", \"zipball_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/zipball/v4\", \"tag_name\": \"v4\", \"node_id\": \"RE_kwDOGXPK3M4HQ1kf\", \"draft\": false, \"prerelease\": false, \"upload_url\": \"https://uploads.github.com/repos/ppanero/zenodo-release-test/releases/121854239/assets{?name,label}\", \"id\": 121854239, \"assets\": []}, \"sender\": {\"following_url\": \"https://api.github.com/users/ppanero/following{/other_user}\", \"events_url\": \"https://api.github.com/users/ppanero/events{/privacy}\", \"avatar_url\": \"https://avatars.githubusercontent.com/u/6756943?v=4\", \"url\": \"https://api.github.com/users/ppanero\", \"gists_url\": \"https://api.github.com/users/ppanero/gists{/gist_id}\", \"html_url\": \"https://github.com/ppanero\", \"subscriptions_url\": \"https://api.github.com/users/ppanero/subscriptions\", \"node_id\": \"MDQ6VXNlcjY3NTY5NDM=\", \"repos_url\": \"https://api.github.com/users/ppanero/repos\", \"received_events_url\": \"https://api.github.com/users/ppanero/received_events\", \"gravatar_id\": \"\", \"starred_url\": \"https://api.github.com/users/ppanero/starred{/owner}{/repo}\", \"site_admin\": false, \"login\": \"ppanero\", \"type\": \"User\", \"id\": 6756943, \"followers_url\": \"https://api.github.com/users/ppanero/followers\", \"organizations_url\": \"https://api.github.com/users/ppanero/orgs\"}, \"repository\": {\"issues_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues{/number}\", \"deployments_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/deployments\", \"stargazers_count\": 0, \"forks_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/forks\", \"mirror_url\": null, \"allow_forking\": true, \"subscription_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/subscription\", \"topics\": [], \"notifications_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/notifications{?since,all,participating}\", \"collaborators_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/collaborators{/collaborator}\", \"updated_at\": \"2021-12-02T13:03:26Z\", \"private\": false, \"pulls_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/pulls{/number}\", \"disabled\": false, \"issue_comment_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues/comments{/number}\", \"labels_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/labels{/name}\", \"has_wiki\": true, \"full_name\": \"ppanero/zenodo-release-test\", \"owner\": {\"following_url\": \"https://api.github.com/users/ppanero/following{/other_user}\", \"events_url\": \"https://api.github.com/users/ppanero/events{/privacy}\", \"avatar_url\": \"https://avatars.githubusercontent.com/u/6756943?v=4\", \"url\": \"https://api.github.com/users/ppanero\", \"gists_url\": \"https://api.github.com/users/ppanero/gists{/gist_id}\", \"html_url\": \"https://github.com/ppanero\", \"subscriptions_url\": \"https://api.github.com/users/ppanero/subscriptions\", \"node_id\": \"MDQ6VXNlcjY3NTY5NDM=\", \"repos_url\": \"https://api.github.com/users/ppanero/repos\", \"received_events_url\": \"https://api.github.com/users/ppanero/received_events\", \"gravatar_id\": \"\", \"starred_url\": \"https://api.github.com/users/ppanero/starred{/owner}{/repo}\", \"site_admin\": false, \"login\": \"ppanero\", \"type\": \"User\", \"id\": 6756943, \"followers_url\": \"https://api.github.com/users/ppanero/followers\", \"organizations_url\": \"https://api.github.com/users/ppanero/orgs\"}, \"statuses_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/statuses/{sha}\", \"id\": 427018972, \"keys_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/keys{/key_id}\", \"description\": null, \"tags_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/tags\", \"archived\": false, \"downloads_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/downloads\", \"assignees_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/assignees{/user}\", \"watchers\": 0, \"contents_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/contents/{+path}\", \"has_pages\": false, \"git_refs_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/refs{/sha}\", \"has_discussions\": false, \"has_projects\": true, \"clone_url\": \"https://github.com/ppanero/zenodo-release-test.git\", \"watchers_count\": 0, \"git_tags_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/tags{/sha}\", \"milestones_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/milestones{/number}\", \"languages_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/languages\", \"size\": 8, \"homepage\": null, \"fork\": false, \"commits_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/commits{/sha}\", \"releases_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/releases{/id}\", \"issue_events_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues/events{/number}\", \"archive_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/{archive_format}{/ref}\", \"comments_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/comments{/number}\", \"events_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/events\", \"contributors_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/contributors\", \"html_url\": \"https://github.com/ppanero/zenodo-release-test\", \"visibility\": \"public\", \"forks\": 0, \"compare_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/compare/{base}...{head}\", \"open_issues\": 0, \"node_id\": \"R_kgDOGXPK3A\", \"git_url\": \"git://github.com/ppanero/zenodo-release-test.git\", \"svn_url\": \"https://github.com/ppanero/zenodo-release-test\", \"merges_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/merges\", \"has_issues\": true, \"ssh_url\": \"git@github.com:ppanero/zenodo-release-test.git\", \"blobs_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/blobs{/sha}\", \"git_commits_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/commits{/sha}\", \"hooks_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks\", \"has_downloads\": true, \"license\": {\"spdx_id\": \"MPL-2.0\", \"url\": \"https://api.github.com/licenses/mpl-2.0\", \"node_id\": \"MDc6TGljZW5zZTE0\", \"name\": \"Mozilla Public License 2.0\", \"key\": \"mpl-2.0\"}, \"name\": \"zenodo-release-test\", \"language\": null, \"url\": \"https://api.github.com/repos/ppanero/zenodo-release-test\", \"created_at\": \"2021-11-11T13:53:02Z\", \"open_issues_count\": 0, \"is_template\": false, \"pushed_at\": \"2023-09-20T11:50:38Z\", \"web_commit_signoff_required\": false, \"forks_count\": 0, \"default_branch\": \"main\", \"teams_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/teams\", \"trees_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/trees{/sha}\", \"branches_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/branches{/branch}\", \"subscribers_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/subscribers\", \"stargazers_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/stargazers\"}}", "payload_headers": null, "response": "{\"status\": 409, \"message\": \"The release has already been received.\"}", "response_headers": null, "response_code": 409}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210768932, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225192608\",\"1473225194680\"]", "schema": "public", "table": "webhooks_events", "txId": 563886520, "lsn": 1473225194680, "xmin": null}, "op": "u", "ts_ms": 1695210769295, "transaction": {"id": "563886520:1473225194680", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 113, "serialized_value_size": 18825, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/tests/actions/github/testdata/release_create.jsonl b/migrator/tests/actions/github/testdata/release_create.jsonl new file mode 100644 index 00000000..29a16eca --- /dev/null +++ b/migrator/tests/actions/github/testdata/release_create.jsonl @@ -0,0 +1,50 @@ + +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20615, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_bucket"}, "value": {"before": null, "after": {"created": 1695210770377940, "updated": 1695210770377951, "id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "default_location": 1, "default_storage_class": "S", "size": 0, "quota_size": 50000000000, "max_file_size": 50000000000, "locked": false, "deleted": false}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225216960\"]", "schema": "public", "table": "files_bucket", "txId": 563886523, "lsn": 1473225216960, "xmin": null}, "op": "c", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225216960", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 110, "serialized_value_size": 660, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20616, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"recid": 1242908, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_recid"}, "value": {"before": null, "after": {"recid": 1242908}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225232760\"]", "schema": "public", "table": "pidstore_recid", "txId": 563886523, "lsn": 1473225232760, "xmin": null}, "op": "c", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225232760", "total_order": 2, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 84, "serialized_value_size": 438, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20617, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": 12196572, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_pid"}, "value": {"before": null, "after": {"created": 1695210770385707, "updated": 1695210770385716, "id": 12196572, "pid_type": "recid", "pid_value": "1242908", "pid_provider": null, "status": "K", "object_type": null, "object_uuid": null}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225239872\"]", "schema": "public", "table": "pidstore_pid", "txId": 563886523, "lsn": 1473225239872, "xmin": null}, "op": "c", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225239872", "total_order": 3, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 80, "serialized_value_size": 600, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20618, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": 12196573, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_pid"}, "value": {"before": null, "after": {"created": 1695210770390099, "updated": 1695210770390106, "id": 12196573, "pid_type": "depid", "pid_value": "1242908", "pid_provider": null, "status": "R", "object_type": "rec", "object_uuid": "5770322d-963d-4655-bbf9-c65e0e430fa4"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225280384\"]", "schema": "public", "table": "pidstore_pid", "txId": 563886523, "lsn": 1473225280384, "xmin": null}, "op": "c", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225280384", "total_order": 4, "data_collection_order": 2}}, "headers": [], "checksum": null, "serialized_key_size": 80, "serialized_value_size": 635, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20619, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "5770322d-963d-4655-bbf9-c65e0e430fa4", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": null, "after": {"created": 1695210770421881, "updated": 1695210770421889, "id": "5770322d-963d-4655-bbf9-c65e0e430fa4", "json": "{\"doi\": \"\", \"recid\": 1242908, \"title\": \"ppanero/zenodo-release-test: more and more\", \"$schema\": \"https://zenodo.org/schemas/deposits/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v4\", \"_buckets\": {\"deposit\": \"13994925-4e8f-4c62-a583-6f41a192c8ec\"}, \"_deposit\": {\"id\": \"1242908\", \"owners\": [], \"status\": \"draft\"}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Zenodo testing migration

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2023-09-20\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v4\"}]}", "version_id": 1}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225324160\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225324160, "xmin": null}, "op": "c", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225324160", "total_order": 5, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 1442, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20620, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"record_id": "5770322d-963d-4655-bbf9-c65e0e430fa4", "bucket_id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_buckets"}, "value": {"before": null, "after": {"record_id": "5770322d-963d-4655-bbf9-c65e0e430fa4", "bucket_id": "13994925-4e8f-4c62-a583-6f41a192c8ec"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225364896\"]", "schema": "public", "table": "records_buckets", "txId": 563886523, "lsn": 1473225364896, "xmin": null}, "op": "c", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225364896", "total_order": 6, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 171, "serialized_value_size": 525, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20621, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"parent_id": 10671423, "child_id": 12196572, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidrelations_pidrelation"}, "value": {"before": null, "after": {"created": 1695210770508930, "updated": 1695210770508940, "parent_id": 10671423, "child_id": 12196572, "relation_type": 0, "index": null}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225374736\"]", "schema": "public", "table": "pidrelations_pidrelation", "txId": 563886523, "lsn": 1473225374736, "xmin": null}, "op": "c", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225374736", "total_order": 7, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 119, "serialized_value_size": 558, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20622, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"parent_id": 10671423, "child_id": 10671424, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidrelations_pidrelation"}, "value": {"before": {"created": 1636639862673774, "updated": 1638450235705191, "parent_id": 10671423, "child_id": 10671424, "relation_type": 0, "index": 0}, "after": {"created": 1636639862673774, "updated": 1695210770511804, "parent_id": 10671423, "child_id": 10671424, "relation_type": 0, "index": 0}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225410824\"]", "schema": "public", "table": "pidrelations_pidrelation", "txId": 563886523, "lsn": 1473225410824, "xmin": null}, "op": "u", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225410824", "total_order": 8, "data_collection_order": 2}}, "headers": [], "checksum": null, "serialized_key_size": 119, "serialized_value_size": 675, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20623, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"parent_id": 10671423, "child_id": 10671822, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidrelations_pidrelation"}, "value": {"before": {"created": 1636644933654444, "updated": 1638450235705214, "parent_id": 10671423, "child_id": 10671822, "relation_type": 0, "index": 1}, "after": {"created": 1636644933654444, "updated": 1695210770511822, "parent_id": 10671423, "child_id": 10671822, "relation_type": 0, "index": 1}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225419128\"]", "schema": "public", "table": "pidrelations_pidrelation", "txId": 563886523, "lsn": 1473225419128, "xmin": null}, "op": "u", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225419128", "total_order": 9, "data_collection_order": 3}}, "headers": [], "checksum": null, "serialized_key_size": 119, "serialized_value_size": 675, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20624, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"parent_id": 10671423, "child_id": 10709294, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidrelations_pidrelation"}, "value": {"before": {"created": 1638450235702748, "updated": 1638450235705225, "parent_id": 10671423, "child_id": 10709294, "relation_type": 0, "index": 2}, "after": {"created": 1638450235702748, "updated": 1695210770511832, "parent_id": 10671423, "child_id": 10709294, "relation_type": 0, "index": 2}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225427464\"]", "schema": "public", "table": "pidrelations_pidrelation", "txId": 563886523, "lsn": 1473225427464, "xmin": null}, "op": "u", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225427464", "total_order": 10, "data_collection_order": 4}}, "headers": [], "checksum": null, "serialized_key_size": 119, "serialized_value_size": 676, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20625, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"parent_id": 10671423, "child_id": 12196572, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidrelations_pidrelation"}, "value": {"before": {"created": 1695210770508930, "updated": 1695210770508940, "parent_id": 10671423, "child_id": 12196572, "relation_type": 0, "index": null}, "after": {"created": 1695210770508930, "updated": 1695210770511840, "parent_id": 10671423, "child_id": 12196572, "relation_type": 0, "index": 3}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225435768\"]", "schema": "public", "table": "pidrelations_pidrelation", "txId": 563886523, "lsn": 1473225435768, "xmin": null}, "op": "u", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225435768", "total_order": 11, "data_collection_order": 5}}, "headers": [], "checksum": null, "serialized_key_size": 119, "serialized_value_size": 679, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20626, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"parent_id": 12196572, "child_id": 12196573, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidrelations_pidrelation"}, "value": {"before": null, "after": {"created": 1695210770529336, "updated": 1695210770529346, "parent_id": 12196572, "child_id": 12196573, "relation_type": 1, "index": null}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225435904\"]", "schema": "public", "table": "pidrelations_pidrelation", "txId": 563886523, "lsn": 1473225435904, "xmin": null}, "op": "c", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225435904", "total_order": 12, "data_collection_order": 6}}, "headers": [], "checksum": null, "serialized_key_size": 119, "serialized_value_size": 559, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20627, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_bucket"}, "value": {"before": {"created": 1695210770377940, "updated": 1695210770377951, "id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "default_location": 1, "default_storage_class": "S", "size": 0, "quota_size": 50000000000, "max_file_size": 50000000000, "locked": false, "deleted": false}, "after": {"created": 1695210770377940, "updated": 1695210771556933, "id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "default_location": 1, "default_storage_class": "S", "size": 0, "quota_size": 50000000000, "max_file_size": 50000000000, "locked": false, "deleted": false}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225445504\"]", "schema": "public", "table": "files_bucket", "txId": 563886523, "lsn": 1473225445504, "xmin": null}, "op": "u", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225445504", "total_order": 13, "data_collection_order": 2}}, "headers": [], "checksum": null, "serialized_key_size": 110, "serialized_value_size": 898, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20628, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"bucket_id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "key": "ppanero/zenodo-release-test-v4.zip", "version_id": "6feca8af-bf00-4d8d-ac91-0d162529e9ab", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_object"}, "value": {"before": null, "after": {"created": 1695210771558224, "updated": 1695210771558233, "bucket_id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "key": "ppanero/zenodo-release-test-v4.zip", "version_id": "6feca8af-bf00-4d8d-ac91-0d162529e9ab", "file_id": null, "_mimetype": "application/zip", "is_head": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225445712\"]", "schema": "public", "table": "files_object", "txId": 563886523, "lsn": 1473225445712, "xmin": null}, "op": "c", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225445712", "total_order": 14, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 212, "serialized_value_size": 681, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20629, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "8163830b-4913-45b8-8da1-17553bb531f9", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_files"}, "value": {"before": null, "after": {"created": 1695210771562664, "updated": 1695210771562672, "id": "8163830b-4913-45b8-8da1-17553bb531f9", "uri": null, "storage_class": null, "size": 0, "checksum": null, "readable": false, "writable": true, "last_check_at": null, "last_check": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225468024\"]", "schema": "public", "table": "files_files", "txId": 563886523, "lsn": 1473225468024, "xmin": null}, "op": "c", "ts_ms": 1695210773344, "transaction": {"id": "563886523:1473225468024", "total_order": 15, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 109, "serialized_value_size": 647, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20630, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"bucket_id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "key": "ppanero/zenodo-release-test-v4.zip", "version_id": "6feca8af-bf00-4d8d-ac91-0d162529e9ab", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_object"}, "value": {"before": {"created": 1695210771558224, "updated": 1695210771558233, "bucket_id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "key": "ppanero/zenodo-release-test-v4.zip", "version_id": "6feca8af-bf00-4d8d-ac91-0d162529e9ab", "file_id": null, "_mimetype": "application/zip", "is_head": true}, "after": {"created": 1695210771558224, "updated": 1695210771564715, "bucket_id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "key": "ppanero/zenodo-release-test-v4.zip", "version_id": "6feca8af-bf00-4d8d-ac91-0d162529e9ab", "file_id": "8163830b-4913-45b8-8da1-17553bb531f9", "_mimetype": "application/zip", "is_head": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225485848\"]", "schema": "public", "table": "files_object", "txId": 563886523, "lsn": 1473225485848, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225485848", "total_order": 16, "data_collection_order": 2}}, "headers": [], "checksum": null, "serialized_key_size": 212, "serialized_value_size": 972, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20631, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "8163830b-4913-45b8-8da1-17553bb531f9", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_files"}, "value": {"before": {"created": 1695210771562664, "updated": 1695210771562672, "id": "8163830b-4913-45b8-8da1-17553bb531f9", "uri": null, "storage_class": null, "size": 0, "checksum": null, "readable": false, "writable": true, "last_check_at": null, "last_check": true}, "after": {"created": 1695210771562664, "updated": 1695210771731330, "id": "8163830b-4913-45b8-8da1-17553bb531f9", "uri": "root://eosmedia.cern.ch//eos/media/zenodo/test/data/81/63/830b-4913-45b8-8da1-17553bb531f9/data", "storage_class": "S", "size": 6379, "checksum": "md5:766c804c222feb2b3bd8b061c2f1899d", "readable": true, "writable": false, "last_check_at": null, "last_check": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225494584\"]", "schema": "public", "table": "files_files", "txId": 563886523, "lsn": 1473225494584, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225494584", "total_order": 17, "data_collection_order": 2}}, "headers": [], "checksum": null, "serialized_key_size": 109, "serialized_value_size": 1000, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20632, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_bucket"}, "value": {"before": {"created": 1695210770377940, "updated": 1695210771556933, "id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "default_location": 1, "default_storage_class": "S", "size": 0, "quota_size": 50000000000, "max_file_size": 50000000000, "locked": false, "deleted": false}, "after": {"created": 1695210770377940, "updated": 1695210771763471, "id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "default_location": 1, "default_storage_class": "S", "size": 6379, "quota_size": 50000000000, "max_file_size": 50000000000, "locked": false, "deleted": false}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225499528\"]", "schema": "public", "table": "files_bucket", "txId": 563886523, "lsn": 1473225499528, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225499528", "total_order": 18, "data_collection_order": 3}}, "headers": [], "checksum": null, "serialized_key_size": 110, "serialized_value_size": 901, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20633, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": 12196572, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_pid"}, "value": {"before": {"created": 1695210770385707, "updated": 1695210770385716, "id": 12196572, "pid_type": "recid", "pid_value": "1242908", "pid_provider": null, "status": "K", "object_type": null, "object_uuid": null}, "after": {"created": 1695210770385707, "updated": 1695210771796226, "id": 12196572, "pid_type": "recid", "pid_value": "1242908", "pid_provider": null, "status": "K", "object_type": "rec", "object_uuid": "a0a2c8be-f375-4913-a77f-45b9ec7c7498"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225499792\"]", "schema": "public", "table": "pidstore_pid", "txId": 563886523, "lsn": 1473225499792, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225499792", "total_order": 19, "data_collection_order": 3}}, "headers": [], "checksum": null, "serialized_key_size": 80, "serialized_value_size": 813, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20634, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": 12196572, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_pid"}, "value": {"before": {"created": 1695210770385707, "updated": 1695210771796226, "id": 12196572, "pid_type": "recid", "pid_value": "1242908", "pid_provider": null, "status": "K", "object_type": "rec", "object_uuid": "a0a2c8be-f375-4913-a77f-45b9ec7c7498"}, "after": {"created": 1695210770385707, "updated": 1695210771822742, "id": 12196572, "pid_type": "recid", "pid_value": "1242908", "pid_provider": null, "status": "R", "object_type": "rec", "object_uuid": "a0a2c8be-f375-4913-a77f-45b9ec7c7498"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225507816\"]", "schema": "public", "table": "pidstore_pid", "txId": 563886523, "lsn": 1473225507816, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225507816", "total_order": 20, "data_collection_order": 4}}, "headers": [], "checksum": null, "serialized_key_size": 80, "serialized_value_size": 848, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20635, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": 12196574, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_pid"}, "value": {"before": null, "after": {"created": 1695210771826543, "updated": 1695210771826551, "id": 12196574, "pid_type": "doi", "pid_value": "10.5072/zenodo.1242908", "pid_provider": "datacite", "status": "K", "object_type": "rec", "object_uuid": "a0a2c8be-f375-4913-a77f-45b9ec7c7498"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225508368\"]", "schema": "public", "table": "pidstore_pid", "txId": 563886523, "lsn": 1473225508368, "xmin": null}, "op": "c", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225508368", "total_order": 21, "data_collection_order": 5}}, "headers": [], "checksum": null, "serialized_key_size": 80, "serialized_value_size": 655, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20636, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": 12196575, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_pid"}, "value": {"before": null, "after": {"created": 1695210771831138, "updated": 1695210771831146, "id": 12196575, "pid_type": "oai", "pid_value": "oai:zenodo.org:1242908", "pid_provider": "oai", "status": "R", "object_type": "rec", "object_uuid": "a0a2c8be-f375-4913-a77f-45b9ec7c7498"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225523384\"]", "schema": "public", "table": "pidstore_pid", "txId": 563886523, "lsn": 1473225523384, "xmin": null}, "op": "c", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225523384", "total_order": 22, "data_collection_order": 6}}, "headers": [], "checksum": null, "serialized_key_size": 80, "serialized_value_size": 650, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20637, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "8163830b-4913-45b8-8da1-17553bb531f9", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_files"}, "value": {"before": {"created": 1695210771562664, "updated": 1695210771731330, "id": "8163830b-4913-45b8-8da1-17553bb531f9", "uri": "root://eosmedia.cern.ch//eos/media/zenodo/test/data/81/63/830b-4913-45b8-8da1-17553bb531f9/data", "storage_class": "S", "size": 6379, "checksum": "md5:766c804c222feb2b3bd8b061c2f1899d", "readable": true, "writable": false, "last_check_at": null, "last_check": true}, "after": {"created": 1695210771562664, "updated": 1695210771858340, "id": "8163830b-4913-45b8-8da1-17553bb531f9", "uri": "root://eosmedia.cern.ch//eos/media/zenodo/test/data/81/63/830b-4913-45b8-8da1-17553bb531f9/data", "storage_class": "S", "size": 6379, "checksum": "md5:766c804c222feb2b3bd8b061c2f1899d", "readable": true, "writable": false, "last_check_at": 1695210771857157, "last_check": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225548416\"]", "schema": "public", "table": "files_files", "txId": 563886523, "lsn": 1473225548416, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225548416", "total_order": 23, "data_collection_order": 3}}, "headers": [], "checksum": null, "serialized_key_size": 109, "serialized_value_size": 1141, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20638, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_bucket"}, "value": {"before": {"created": 1695210770377940, "updated": 1695210771763471, "id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "default_location": 1, "default_storage_class": "S", "size": 6379, "quota_size": 50000000000, "max_file_size": 50000000000, "locked": false, "deleted": false}, "after": {"created": 1695210770377940, "updated": 1695210771871116, "id": "13994925-4e8f-4c62-a583-6f41a192c8ec", "default_location": 1, "default_storage_class": "S", "size": 6379, "quota_size": 50000000000, "max_file_size": 50000000000, "locked": true, "deleted": false}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225557232\"]", "schema": "public", "table": "files_bucket", "txId": 563886523, "lsn": 1473225557232, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225557232", "total_order": 24, "data_collection_order": 4}}, "headers": [], "checksum": null, "serialized_key_size": 110, "serialized_value_size": 903, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20639, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "5fb43415-2e99-412a-904b-aa166fc07f27", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_bucket"}, "value": {"before": null, "after": {"created": 1695210771878707, "updated": 1695210771878716, "id": "5fb43415-2e99-412a-904b-aa166fc07f27", "default_location": 1, "default_storage_class": "S", "size": 0, "quota_size": 50000000000, "max_file_size": null, "locked": false, "deleted": false}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225557440\"]", "schema": "public", "table": "files_bucket", "txId": 563886523, "lsn": 1473225557440, "xmin": null}, "op": "c", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225557440", "total_order": 25, "data_collection_order": 5}}, "headers": [], "checksum": null, "serialized_key_size": 110, "serialized_value_size": 654, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20640, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "5fb43415-2e99-412a-904b-aa166fc07f27", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_bucket"}, "value": {"before": {"created": 1695210771878707, "updated": 1695210771878716, "id": "5fb43415-2e99-412a-904b-aa166fc07f27", "default_location": 1, "default_storage_class": "S", "size": 0, "quota_size": 50000000000, "max_file_size": null, "locked": false, "deleted": false}, "after": {"created": 1695210771878707, "updated": 1695210771886252, "id": "5fb43415-2e99-412a-904b-aa166fc07f27", "default_location": 1, "default_storage_class": "S", "size": 0, "quota_size": 50000000000, "max_file_size": null, "locked": false, "deleted": false}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225565192\"]", "schema": "public", "table": "files_bucket", "txId": 563886523, "lsn": 1473225565192, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225565192", "total_order": 26, "data_collection_order": 6}}, "headers": [], "checksum": null, "serialized_key_size": 110, "serialized_value_size": 884, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20641, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"bucket_id": "5fb43415-2e99-412a-904b-aa166fc07f27", "key": "ppanero/zenodo-release-test-v4.zip", "version_id": "c13fbe72-9d8c-47b6-8a03-7c075e9abbd5", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_object"}, "value": {"before": null, "after": {"created": 1695210771887784, "updated": 1695210771887791, "bucket_id": "5fb43415-2e99-412a-904b-aa166fc07f27", "key": "ppanero/zenodo-release-test-v4.zip", "version_id": "c13fbe72-9d8c-47b6-8a03-7c075e9abbd5", "file_id": null, "_mimetype": null, "is_head": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225565400\"]", "schema": "public", "table": "files_object", "txId": 563886523, "lsn": 1473225565400, "xmin": null}, "op": "c", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225565400", "total_order": 27, "data_collection_order": 3}}, "headers": [], "checksum": null, "serialized_key_size": 212, "serialized_value_size": 668, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20642, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "5fb43415-2e99-412a-904b-aa166fc07f27", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_bucket"}, "value": {"before": {"created": 1695210771878707, "updated": 1695210771886252, "id": "5fb43415-2e99-412a-904b-aa166fc07f27", "default_location": 1, "default_storage_class": "S", "size": 0, "quota_size": 50000000000, "max_file_size": null, "locked": false, "deleted": false}, "after": {"created": 1695210771878707, "updated": 1695210771919272, "id": "5fb43415-2e99-412a-904b-aa166fc07f27", "default_location": 1, "default_storage_class": "S", "size": 6379, "quota_size": 50000000000, "max_file_size": null, "locked": false, "deleted": false}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225593472\"]", "schema": "public", "table": "files_bucket", "txId": 563886523, "lsn": 1473225593472, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225593472", "total_order": 28, "data_collection_order": 7}}, "headers": [], "checksum": null, "serialized_key_size": 110, "serialized_value_size": 887, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20643, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "8163830b-4913-45b8-8da1-17553bb531f9", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_files"}, "value": {"before": {"created": 1695210771562664, "updated": 1695210771858340, "id": "8163830b-4913-45b8-8da1-17553bb531f9", "uri": "root://eosmedia.cern.ch//eos/media/zenodo/test/data/81/63/830b-4913-45b8-8da1-17553bb531f9/data", "storage_class": "S", "size": 6379, "checksum": "md5:766c804c222feb2b3bd8b061c2f1899d", "readable": true, "writable": false, "last_check_at": 1695210771857157, "last_check": true}, "after": {"created": 1695210771562664, "updated": 1695210771920468, "id": "8163830b-4913-45b8-8da1-17553bb531f9", "uri": "root://eosmedia.cern.ch//eos/media/zenodo/test/data/81/63/830b-4913-45b8-8da1-17553bb531f9/data", "storage_class": "S", "size": 6379, "checksum": "md5:766c804c222feb2b3bd8b061c2f1899d", "readable": true, "writable": false, "last_check_at": 1695210771857157, "last_check": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225593848\"]", "schema": "public", "table": "files_files", "txId": 563886523, "lsn": 1473225593848, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225593848", "total_order": 29, "data_collection_order": 4}}, "headers": [], "checksum": null, "serialized_key_size": 109, "serialized_value_size": 1153, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20644, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"bucket_id": "5fb43415-2e99-412a-904b-aa166fc07f27", "key": "ppanero/zenodo-release-test-v4.zip", "version_id": "c13fbe72-9d8c-47b6-8a03-7c075e9abbd5", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_object"}, "value": {"before": {"created": 1695210771887784, "updated": 1695210771887791, "bucket_id": "5fb43415-2e99-412a-904b-aa166fc07f27", "key": "ppanero/zenodo-release-test-v4.zip", "version_id": "c13fbe72-9d8c-47b6-8a03-7c075e9abbd5", "file_id": null, "_mimetype": null, "is_head": true}, "after": {"created": 1695210771887784, "updated": 1695210771921351, "bucket_id": "5fb43415-2e99-412a-904b-aa166fc07f27", "key": "ppanero/zenodo-release-test-v4.zip", "version_id": "c13fbe72-9d8c-47b6-8a03-7c075e9abbd5", "file_id": "8163830b-4913-45b8-8da1-17553bb531f9", "_mimetype": null, "is_head": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225602688\"]", "schema": "public", "table": "files_object", "txId": 563886523, "lsn": 1473225602688, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225602688", "total_order": 30, "data_collection_order": 4}}, "headers": [], "checksum": null, "serialized_key_size": 212, "serialized_value_size": 946, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20645, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "5fb43415-2e99-412a-904b-aa166fc07f27", "__dbz__physicalTableIdentifier": "zenodo-qa.public.files_bucket"}, "value": {"before": {"created": 1695210771878707, "updated": 1695210771919272, "id": "5fb43415-2e99-412a-904b-aa166fc07f27", "default_location": 1, "default_storage_class": "S", "size": 6379, "quota_size": 50000000000, "max_file_size": null, "locked": false, "deleted": false}, "after": {"created": 1695210771878707, "updated": 1695210771924225, "id": "5fb43415-2e99-412a-904b-aa166fc07f27", "default_location": 1, "default_storage_class": "S", "size": 6379, "quota_size": 50000000000, "max_file_size": null, "locked": true, "deleted": false}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225603368\"]", "schema": "public", "table": "files_bucket", "txId": 563886523, "lsn": 1473225603368, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225603368", "total_order": 31, "data_collection_order": 8}}, "headers": [], "checksum": null, "serialized_key_size": 110, "serialized_value_size": 889, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20646, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": null, "after": {"created": 1695210771936723, "updated": 1695210771936731, "id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "json": "{\"doi\": \"10.5072/zenodo.1242908\", \"_oai\": {\"id\": \"oai:zenodo.org:1242908\"}, \"recid\": 1242908, \"title\": \"ppanero/zenodo-release-test: more and more\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v4.zip\", \"size\": 6379, \"type\": \"zip\", \"bucket\": \"5fb43415-2e99-412a-904b-aa166fc07f27\", \"file_id\": \"8163830b-4913-45b8-8da1-17553bb531f9\", \"checksum\": \"md5:766c804c222feb2b3bd8b061c2f1899d\", \"version_id\": \"c13fbe72-9d8c-47b6-8a03-7c075e9abbd5\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v4\", \"_buckets\": {\"record\": \"5fb43415-2e99-412a-904b-aa166fc07f27\", \"deposit\": \"13994925-4e8f-4c62-a583-6f41a192c8ec\"}, \"_deposit\": {\"id\": \"1242908\", \"pid\": {\"type\": \"recid\", \"value\": \"1242908\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Zenodo testing migration

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2023-09-20\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v4\"}]}", "version_id": 1}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225603576\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225603576, "xmin": null}, "op": "c", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225603576", "total_order": 32, "data_collection_order": 2}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 2010, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20647, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"record_id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "bucket_id": "5fb43415-2e99-412a-904b-aa166fc07f27", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_buckets"}, "value": {"before": null, "after": {"record_id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "bucket_id": "5fb43415-2e99-412a-904b-aa166fc07f27"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225618520\"]", "schema": "public", "table": "records_buckets", "txId": 563886523, "lsn": 1473225618520, "xmin": null}, "op": "c", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225618520", "total_order": 33, "data_collection_order": 2}}, "headers": [], "checksum": null, "serialized_key_size": 171, "serialized_value_size": 526, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20648, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "7eb249a1-96bb-4c7a-ad90-fa04ac9fe6d8", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": {"created": 1636639864296820, "updated": 1638450237732364, "id": "7eb249a1-96bb-4c7a-ad90-fa04ac9fe6d8", "json": "{\"doi\": \"10.5072/zenodo.958576\", \"_oai\": {\"id\": \"oai:zenodo.org:958576\"}, \"recid\": 958576, \"title\": \"ppanero/zenodo-release-test: v1\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v1.zip\", \"size\": 433, \"type\": \"zip\", \"bucket\": \"8d780953-7939-45ec-af37-51fba7e533e8\", \"file_id\": \"4599d8c5-ba20-4aff-9ba3-2b15d7599be2\", \"checksum\": \"md5:37a1fc414c9acace82c835ec47753755\", \"version_id\": \"6c697488-8584-479c-b5bc-abb44dec0cda\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v1\", \"_buckets\": {\"record\": \"8d780953-7939-45ec-af37-51fba7e533e8\", \"deposit\": \"4cc9f95c-255f-4bee-b5e2-fc974dab5af2\"}, \"_deposit\": {\"id\": \"958576\", \"pid\": {\"type\": \"recid\", \"value\": \"958576\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Test

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-11-11\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v1\"}]}", "version_id": 5}, "after": {"created": 1636639864296820, "updated": 1695210772042760, "id": "7eb249a1-96bb-4c7a-ad90-fa04ac9fe6d8", "json": "{\"doi\": \"10.5072/zenodo.958576\", \"_oai\": {\"id\": \"oai:zenodo.org:958576\"}, \"recid\": 958576, \"title\": \"ppanero/zenodo-release-test: v1\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v1.zip\", \"size\": 433, \"type\": \"zip\", \"bucket\": \"8d780953-7939-45ec-af37-51fba7e533e8\", \"file_id\": \"4599d8c5-ba20-4aff-9ba3-2b15d7599be2\", \"checksum\": \"md5:37a1fc414c9acace82c835ec47753755\", \"version_id\": \"6c697488-8584-479c-b5bc-abb44dec0cda\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v1\", \"_buckets\": {\"record\": \"8d780953-7939-45ec-af37-51fba7e533e8\", \"deposit\": \"4cc9f95c-255f-4bee-b5e2-fc974dab5af2\"}, \"_deposit\": {\"id\": \"958576\", \"pid\": {\"type\": \"recid\", \"value\": \"958576\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Test

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-11-11\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v1\"}]}", "version_id": 6}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225633304\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225633304, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225633304", "total_order": 34, "data_collection_order": 3}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 3518, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20649, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "a9351593-9c6f-4038-a7da-36b9a87aed85", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": {"created": 1636639862599908, "updated": 1638450237771421, "id": "a9351593-9c6f-4038-a7da-36b9a87aed85", "json": "{\"doi\": \"10.5072/zenodo.958576\", \"_oai\": {\"id\": \"oai:zenodo.org:958576\"}, \"recid\": 958576, \"title\": \"ppanero/zenodo-release-test: v1\", \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/deposits/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v1\", \"_buckets\": {\"deposit\": \"4cc9f95c-255f-4bee-b5e2-fc974dab5af2\"}, \"_deposit\": {\"id\": \"958576\", \"pid\": {\"type\": \"recid\", \"value\": \"958576\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"communities\": [\"zenodo\"], \"description\": \"

Test

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-11-11\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v1\"}]}", "version_id": 4}, "after": {"created": 1636639862599908, "updated": 1695210772129063, "id": "a9351593-9c6f-4038-a7da-36b9a87aed85", "json": "{\"doi\": \"10.5072/zenodo.958576\", \"_oai\": {\"id\": \"oai:zenodo.org:958576\"}, \"recid\": 958576, \"title\": \"ppanero/zenodo-release-test: v1\", \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/deposits/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v1\", \"_buckets\": {\"deposit\": \"4cc9f95c-255f-4bee-b5e2-fc974dab5af2\"}, \"_deposit\": {\"id\": \"958576\", \"pid\": {\"type\": \"recid\", \"value\": \"958576\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"communities\": [\"zenodo\"], \"description\": \"

Test

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-11-11\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v1\"}]}", "version_id": 5}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225684816\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225684816, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225684816", "total_order": 35, "data_collection_order": 4}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 2846, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20650, "timestamp": 1695210773620, "timestamp_type": 0, "key": {"id": "b72fb4b0-96d3-4a4e-bc89-77ce72886882", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": {"created": 1636644934958551, "updated": 1638450237838667, "id": "b72fb4b0-96d3-4a4e-bc89-77ce72886882", "json": "{\"doi\": \"10.5072/zenodo.958711\", \"_oai\": {\"id\": \"oai:zenodo.org:958711\"}, \"recid\": 958711, \"title\": \"ppanero/zenodo-release-test: v2-alpha\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v2alpha.zip\", \"size\": 441, \"type\": \"zip\", \"bucket\": \"f564a040-5e76-41c8-a999-0760f907f4cb\", \"file_id\": \"fd5828bc-a6b3-4d69-ba2f-2370aa5ef60f\", \"checksum\": \"md5:8693a962b1bab2411d9129c889c677d7\", \"version_id\": \"dc583b46-3579-4352-8e71-bbd99af74322\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v2alpha\", \"_buckets\": {\"record\": \"f564a040-5e76-41c8-a999-0760f907f4cb\", \"deposit\": \"7c23c856-5617-4fba-bf7f-997d24b9097c\"}, \"_deposit\": {\"id\": \"958711\", \"pid\": {\"type\": \"recid\", \"value\": \"958711\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Test again

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-11-11\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v2alpha\"}]}", "version_id": 4}, "after": {"created": 1636644934958551, "updated": 1695210772211201, "id": "b72fb4b0-96d3-4a4e-bc89-77ce72886882", "json": "{\"doi\": \"10.5072/zenodo.958711\", \"_oai\": {\"id\": \"oai:zenodo.org:958711\"}, \"recid\": 958711, \"title\": \"ppanero/zenodo-release-test: v2-alpha\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v2alpha.zip\", \"size\": 441, \"type\": \"zip\", \"bucket\": \"f564a040-5e76-41c8-a999-0760f907f4cb\", \"file_id\": \"fd5828bc-a6b3-4d69-ba2f-2370aa5ef60f\", \"checksum\": \"md5:8693a962b1bab2411d9129c889c677d7\", \"version_id\": \"dc583b46-3579-4352-8e71-bbd99af74322\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v2alpha\", \"_buckets\": {\"record\": \"f564a040-5e76-41c8-a999-0760f907f4cb\", \"deposit\": \"7c23c856-5617-4fba-bf7f-997d24b9097c\"}, \"_deposit\": {\"id\": \"958711\", \"pid\": {\"type\": \"recid\", \"value\": \"958711\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Test again

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-11-11\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v2alpha\"}]}", "version_id": 5}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225710208\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225710208, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225710208", "total_order": 36, "data_collection_order": 5}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 3572, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20651, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": "23e0ded6-9b05-4407-8738-624e8ff26344", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": {"created": 1636644933586835, "updated": 1638450237852874, "id": "23e0ded6-9b05-4407-8738-624e8ff26344", "json": "{\"doi\": \"10.5072/zenodo.958711\", \"_oai\": {\"id\": \"oai:zenodo.org:958711\"}, \"recid\": 958711, \"title\": \"ppanero/zenodo-release-test: v2-alpha\", \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/deposits/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v2alpha\", \"_buckets\": {\"deposit\": \"7c23c856-5617-4fba-bf7f-997d24b9097c\"}, \"_deposit\": {\"id\": \"958711\", \"pid\": {\"type\": \"recid\", \"value\": \"958711\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"communities\": [\"zenodo\"], \"description\": \"

Test again

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-11-11\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v2alpha\"}]}", "version_id": 3}, "after": {"created": 1636644933586835, "updated": 1695210772225186, "id": "23e0ded6-9b05-4407-8738-624e8ff26344", "json": "{\"doi\": \"10.5072/zenodo.958711\", \"_oai\": {\"id\": \"oai:zenodo.org:958711\"}, \"recid\": 958711, \"title\": \"ppanero/zenodo-release-test: v2-alpha\", \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/deposits/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v2alpha\", \"_buckets\": {\"deposit\": \"7c23c856-5617-4fba-bf7f-997d24b9097c\"}, \"_deposit\": {\"id\": \"958711\", \"pid\": {\"type\": \"recid\", \"value\": \"958711\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"communities\": [\"zenodo\"], \"description\": \"

Test again

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-11-11\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v2alpha\"}]}", "version_id": 4}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225737096\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225737096, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225737096", "total_order": 37, "data_collection_order": 6}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 2890, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20652, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": "0d03a623-84b5-411b-aea0-c86b345f38b7", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": {"created": 1638450237646440, "updated": 1638450238490412, "id": "0d03a623-84b5-411b-aea0-c86b345f38b7", "json": "{\"doi\": \"10.5072/zenodo.973693\", \"_oai\": {\"id\": \"oai:zenodo.org:973693\"}, \"recid\": 973693, \"title\": \"ppanero/zenodo-release-test: with citation and license\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v3.zip\", \"size\": 6373, \"type\": \"zip\", \"bucket\": \"d5f38f75-357c-4c11-96c5-84ca24b66070\", \"file_id\": \"d9ebff4d-2a93-45fd-bf63-f3bc541e6a08\", \"checksum\": \"md5:bde738bfa1f68d35a365d28b8eedc113\", \"version_id\": \"95e4a112-95ca-46d0-83a8-a84152fab65b\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v3\", \"_buckets\": {\"record\": \"d5f38f75-357c-4c11-96c5-84ca24b66070\", \"deposit\": \"895f78ad-cff1-416e-bc95-5835f6cfa14b\"}, \"_deposit\": {\"id\": \"973693\", \"pid\": {\"type\": \"recid\", \"value\": \"973693\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"No description provided.\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-12-02\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v3\"}]}", "version_id": 3}, "after": {"created": 1638450237646440, "updated": 1695210772361308, "id": "0d03a623-84b5-411b-aea0-c86b345f38b7", "json": "{\"doi\": \"10.5072/zenodo.973693\", \"_oai\": {\"id\": \"oai:zenodo.org:973693\"}, \"recid\": 973693, \"title\": \"ppanero/zenodo-release-test: with citation and license\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v3.zip\", \"size\": 6373, \"type\": \"zip\", \"bucket\": \"d5f38f75-357c-4c11-96c5-84ca24b66070\", \"file_id\": \"d9ebff4d-2a93-45fd-bf63-f3bc541e6a08\", \"checksum\": \"md5:bde738bfa1f68d35a365d28b8eedc113\", \"version_id\": \"95e4a112-95ca-46d0-83a8-a84152fab65b\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v3\", \"_buckets\": {\"record\": \"d5f38f75-357c-4c11-96c5-84ca24b66070\", \"deposit\": \"895f78ad-cff1-416e-bc95-5835f6cfa14b\"}, \"_deposit\": {\"id\": \"973693\", \"pid\": {\"type\": \"recid\", \"value\": \"973693\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"No description provided.\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-12-02\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v3\"}]}", "version_id": 4}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225785312\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225785312, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225785312", "total_order": 38, "data_collection_order": 7}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 3592, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20653, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": "7dbf85a6-d126-4e2f-8c20-bd44b5845d15", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": {"created": 1638450235632242, "updated": 1658920485317857, "id": "7dbf85a6-d126-4e2f-8c20-bd44b5845d15", "json": "{\"doi\": \"10.5072/zenodo.973693\", \"_oai\": {\"id\": \"oai:zenodo.org:973693\"}, \"recid\": 973693, \"title\": \"ppanero/zenodo-release-test: with citation and license\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v3.zip\", \"size\": 6373, \"type\": \"zip\", \"bucket\": \"d5f38f75-357c-4c11-96c5-84ca24b66070\", \"file_id\": \"d9ebff4d-2a93-45fd-bf63-f3bc541e6a08\", \"checksum\": \"md5:bde738bfa1f68d35a365d28b8eedc113\", \"version_id\": \"95e4a112-95ca-46d0-83a8-a84152fab65b\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/deposits/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v3\", \"_buckets\": {\"record\": \"d5f38f75-357c-4c11-96c5-84ca24b66070\", \"deposit\": \"895f78ad-cff1-416e-bc95-5835f6cfa14b\"}, \"_deposit\": {\"id\": \"973693\", \"pid\": {\"type\": \"recid\", \"value\": \"973693\", \"revision_id\": 2}, \"owners\": [86490], \"status\": \"draft\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"No description provided.\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-12-02\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v3\"}]}", "version_id": 3}, "after": {"created": 1638450235632242, "updated": 1695210772391860, "id": "7dbf85a6-d126-4e2f-8c20-bd44b5845d15", "json": "{\"doi\": \"10.5072/zenodo.973693\", \"_oai\": {\"id\": \"oai:zenodo.org:973693\"}, \"recid\": 973693, \"title\": \"ppanero/zenodo-release-test: with citation and license\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v3.zip\", \"size\": 6373, \"type\": \"zip\", \"bucket\": \"d5f38f75-357c-4c11-96c5-84ca24b66070\", \"file_id\": \"d9ebff4d-2a93-45fd-bf63-f3bc541e6a08\", \"checksum\": \"md5:bde738bfa1f68d35a365d28b8eedc113\", \"version_id\": \"95e4a112-95ca-46d0-83a8-a84152fab65b\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/deposits/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v3\", \"_buckets\": {\"record\": \"d5f38f75-357c-4c11-96c5-84ca24b66070\", \"deposit\": \"895f78ad-cff1-416e-bc95-5835f6cfa14b\"}, \"_deposit\": {\"id\": \"973693\", \"pid\": {\"type\": \"recid\", \"value\": \"973693\", \"revision_id\": 2}, \"owners\": [86490], \"status\": \"draft\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"communities\": [\"zenodo\"], \"description\": \"No description provided.\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2021-12-02\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v3\"}]}", "version_id": 4}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225827160\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225827160, "xmin": null}, "op": "u", "ts_ms": 1695210773345, "transaction": {"id": "563886523:1473225827160", "total_order": 39, "data_collection_order": 8}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 3633, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20654, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": {"created": 1695210771936723, "updated": 1695210771936731, "id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "json": "{\"doi\": \"10.5072/zenodo.1242908\", \"_oai\": {\"id\": \"oai:zenodo.org:1242908\"}, \"recid\": 1242908, \"title\": \"ppanero/zenodo-release-test: more and more\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v4.zip\", \"size\": 6379, \"type\": \"zip\", \"bucket\": \"5fb43415-2e99-412a-904b-aa166fc07f27\", \"file_id\": \"8163830b-4913-45b8-8da1-17553bb531f9\", \"checksum\": \"md5:766c804c222feb2b3bd8b061c2f1899d\", \"version_id\": \"c13fbe72-9d8c-47b6-8a03-7c075e9abbd5\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v4\", \"_buckets\": {\"record\": \"5fb43415-2e99-412a-904b-aa166fc07f27\", \"deposit\": \"13994925-4e8f-4c62-a583-6f41a192c8ec\"}, \"_deposit\": {\"id\": \"1242908\", \"pid\": {\"type\": \"recid\", \"value\": \"1242908\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Zenodo testing migration

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2023-09-20\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v4\"}]}", "version_id": 1}, "after": {"created": 1695210771936723, "updated": 1695210772626861, "id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "json": "{\"doi\": \"10.5072/zenodo.1242908\", \"_oai\": {\"id\": \"oai:zenodo.org:1242908\"}, \"recid\": 1242908, \"title\": \"ppanero/zenodo-release-test: more and more\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v4.zip\", \"size\": 6379, \"type\": \"zip\", \"bucket\": \"5fb43415-2e99-412a-904b-aa166fc07f27\", \"file_id\": \"8163830b-4913-45b8-8da1-17553bb531f9\", \"checksum\": \"md5:766c804c222feb2b3bd8b061c2f1899d\", \"version_id\": \"c13fbe72-9d8c-47b6-8a03-7c075e9abbd5\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v4\", \"_buckets\": {\"record\": \"5fb43415-2e99-412a-904b-aa166fc07f27\", \"deposit\": \"13994925-4e8f-4c62-a583-6f41a192c8ec\"}, \"_deposit\": {\"id\": \"1242908\", \"pid\": {\"type\": \"recid\", \"value\": \"1242908\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Zenodo testing migration

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2023-09-20\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v4\"}]}", "version_id": 2}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225867648\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225867648, "xmin": null}, "op": "u", "ts_ms": 1695210773346, "transaction": {"id": "563886523:1473225867648", "total_order": 40, "data_collection_order": 9}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 3592, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20655, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": 10671423, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_pid"}, "value": {"before": {"created": 1636639862574383, "updated": 1638450238047510, "id": 10671423, "pid_type": "recid", "pid_value": "958575", "pid_provider": null, "status": "M", "object_type": null, "object_uuid": "22693524-3c95-4623-9d90-1bfec41c575c"}, "after": {"created": 1636639862574383, "updated": 1695210772644938, "id": 10671423, "pid_type": "recid", "pid_value": "958575", "pid_provider": null, "status": "M", "object_type": null, "object_uuid": "22693524-3c95-4623-9d90-1bfec41c575c"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225872648\"]", "schema": "public", "table": "pidstore_pid", "txId": 563886523, "lsn": 1473225872648, "xmin": null}, "op": "u", "ts_ms": 1695210773346, "transaction": {"id": "563886523:1473225872648", "total_order": 41, "data_collection_order": 7}}, "headers": [], "checksum": null, "serialized_key_size": 80, "serialized_value_size": 844, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20656, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": 10709294, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_pid"}, "value": {"before": {"created": 1638450235615307, "updated": 1638450238047543, "id": 10709294, "pid_type": "recid", "pid_value": "973693", "pid_provider": null, "status": "R", "object_type": "rec", "object_uuid": "0d03a623-84b5-411b-aea0-c86b345f38b7"}, "after": {"created": 1638450235615307, "updated": 1695210772644960, "id": 10709294, "pid_type": "recid", "pid_value": "973693", "pid_provider": null, "status": "R", "object_type": "rec", "object_uuid": "0d03a623-84b5-411b-aea0-c86b345f38b7"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225881032\"]", "schema": "public", "table": "pidstore_pid", "txId": 563886523, "lsn": 1473225881032, "xmin": null}, "op": "u", "ts_ms": 1695210773346, "transaction": {"id": "563886523:1473225881032", "total_order": 42, "data_collection_order": 8}}, "headers": [], "checksum": null, "serialized_key_size": 80, "serialized_value_size": 846, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20657, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": 12196572, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_pid"}, "value": {"before": {"created": 1695210770385707, "updated": 1695210771822742, "id": 12196572, "pid_type": "recid", "pid_value": "1242908", "pid_provider": null, "status": "R", "object_type": "rec", "object_uuid": "a0a2c8be-f375-4913-a77f-45b9ec7c7498"}, "after": {"created": 1695210770385707, "updated": 1695210772644970, "id": 12196572, "pid_type": "recid", "pid_value": "1242908", "pid_provider": null, "status": "R", "object_type": "rec", "object_uuid": "a0a2c8be-f375-4913-a77f-45b9ec7c7498"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225911272\"]", "schema": "public", "table": "pidstore_pid", "txId": 563886523, "lsn": 1473225911272, "xmin": null}, "op": "u", "ts_ms": 1695210773346, "transaction": {"id": "563886523:1473225911272", "total_order": 43, "data_collection_order": 9}}, "headers": [], "checksum": null, "serialized_key_size": 80, "serialized_value_size": 848, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20658, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": "22693524-3c95-4623-9d90-1bfec41c575c", "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidstore_redirect"}, "value": {"before": {"created": 1636639864476288, "updated": 1638450238049517, "id": "22693524-3c95-4623-9d90-1bfec41c575c", "pid_id": 10709294}, "after": {"created": 1636639864476288, "updated": 1695210772707148, "id": "22693524-3c95-4623-9d90-1bfec41c575c", "pid_id": 12196572}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225911472\"]", "schema": "public", "table": "pidstore_redirect", "txId": 563886523, "lsn": 1473225911472, "xmin": null}, "op": "u", "ts_ms": 1695210773346, "transaction": {"id": "563886523:1473225911472", "total_order": 44, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 115, "serialized_value_size": 655, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20659, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"parent_id": 12196572, "child_id": 12196573, "__dbz__physicalTableIdentifier": "zenodo-qa.public.pidrelations_pidrelation"}, "value": {"before": {"created": 1695210770529336, "updated": 1695210770529346, "parent_id": 12196572, "child_id": 12196573, "relation_type": 1, "index": null}, "after": null, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225916080\"]", "schema": "public", "table": "pidrelations_pidrelation", "txId": 563886523, "lsn": 1473225916080, "xmin": null}, "op": "d", "ts_ms": 1695210773346, "transaction": {"id": "563886523:1473225916080", "total_order": 45, "data_collection_order": 7}}, "headers": [], "checksum": null, "serialized_key_size": 119, "serialized_value_size": 559, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20661, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": "5770322d-963d-4655-bbf9-c65e0e430fa4", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": {"created": 1695210770421881, "updated": 1695210770421889, "id": "5770322d-963d-4655-bbf9-c65e0e430fa4", "json": "{\"doi\": \"\", \"recid\": 1242908, \"title\": \"ppanero/zenodo-release-test: more and more\", \"$schema\": \"https://zenodo.org/schemas/deposits/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v4\", \"_buckets\": {\"deposit\": \"13994925-4e8f-4c62-a583-6f41a192c8ec\"}, \"_deposit\": {\"id\": \"1242908\", \"owners\": [], \"status\": \"draft\"}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Zenodo testing migration

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2023-09-20\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v4\"}]}", "version_id": 1}, "after": {"created": 1695210770421881, "updated": 1695210772934378, "id": "5770322d-963d-4655-bbf9-c65e0e430fa4", "json": "{\"doi\": \"10.5072/zenodo.1242908\", \"_oai\": {\"id\": \"oai:zenodo.org:1242908\"}, \"recid\": 1242908, \"title\": \"ppanero/zenodo-release-test: more and more\", \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/deposits/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v4\", \"_buckets\": {\"deposit\": \"13994925-4e8f-4c62-a583-6f41a192c8ec\"}, \"_deposit\": {\"id\": \"1242908\", \"pid\": {\"type\": \"recid\", \"value\": \"1242908\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"communities\": [\"zenodo\"], \"description\": \"

Zenodo testing migration

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2023-09-20\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v4\"}]}", "version_id": 2}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225916336\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225916336, "xmin": null}, "op": "u", "ts_ms": 1695210773346, "transaction": {"id": "563886523:1473225916336", "total_order": 46, "data_collection_order": 10}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 2689, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20662, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "__dbz__physicalTableIdentifier": "zenodo-qa.public.records_metadata"}, "value": {"before": {"created": 1695210771936723, "updated": 1695210772626861, "id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "json": "{\"doi\": \"10.5072/zenodo.1242908\", \"_oai\": {\"id\": \"oai:zenodo.org:1242908\"}, \"recid\": 1242908, \"title\": \"ppanero/zenodo-release-test: more and more\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v4.zip\", \"size\": 6379, \"type\": \"zip\", \"bucket\": \"5fb43415-2e99-412a-904b-aa166fc07f27\", \"file_id\": \"8163830b-4913-45b8-8da1-17553bb531f9\", \"checksum\": \"md5:766c804c222feb2b3bd8b061c2f1899d\", \"version_id\": \"c13fbe72-9d8c-47b6-8a03-7c075e9abbd5\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v4\", \"_buckets\": {\"record\": \"5fb43415-2e99-412a-904b-aa166fc07f27\", \"deposit\": \"13994925-4e8f-4c62-a583-6f41a192c8ec\"}, \"_deposit\": {\"id\": \"1242908\", \"pid\": {\"type\": \"recid\", \"value\": \"1242908\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Zenodo testing migration

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2023-09-20\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v4\"}]}", "version_id": 2}, "after": {"created": 1695210771936723, "updated": 1695210773128243, "id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "json": "{\"doi\": \"10.5072/zenodo.1242908\", \"_oai\": {\"id\": \"oai:zenodo.org:1242908\"}, \"recid\": 1242908, \"title\": \"ppanero/zenodo-release-test: more and more\", \"_files\": [{\"key\": \"ppanero/zenodo-release-test-v4.zip\", \"size\": 6379, \"type\": \"zip\", \"bucket\": \"5fb43415-2e99-412a-904b-aa166fc07f27\", \"file_id\": \"8163830b-4913-45b8-8da1-17553bb531f9\", \"checksum\": \"md5:766c804c222feb2b3bd8b061c2f1899d\", \"version_id\": \"c13fbe72-9d8c-47b6-8a03-7c075e9abbd5\"}], \"owners\": [86490], \"$schema\": \"https://zenodo.org/schemas/records/record-v1.0.0.json\", \"license\": {\"$ref\": \"https://dx.zenodo.org/licenses/other-open\"}, \"version\": \"v4\", \"_buckets\": {\"record\": \"5fb43415-2e99-412a-904b-aa166fc07f27\", \"deposit\": \"13994925-4e8f-4c62-a583-6f41a192c8ec\"}, \"_deposit\": {\"id\": \"1242908\", \"pid\": {\"type\": \"recid\", \"value\": \"1242908\", \"revision_id\": 0}, \"owners\": [86490], \"status\": \"published\", \"created_by\": 86490}, \"creators\": [{\"name\": \"Pablo Panero\", \"affiliation\": \"CERN - Zenodo\"}], \"conceptdoi\": \"10.5072/zenodo.958575\", \"description\": \"

Zenodo testing migration

\", \"access_right\": \"open\", \"conceptrecid\": \"958575\", \"resource_type\": {\"type\": \"software\"}, \"publication_date\": \"2023-09-20\", \"related_identifiers\": [{\"scheme\": \"url\", \"relation\": \"isSupplementTo\", \"identifier\": \"https://github.com/ppanero/zenodo-release-test/tree/v4\"}]}", "version_id": 3}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225977912\"]", "schema": "public", "table": "records_metadata", "txId": 563886523, "lsn": 1473225977912, "xmin": null}, "op": "u", "ts_ms": 1695210773346, "transaction": {"id": "563886523:1473225977912", "total_order": 47, "data_collection_order": 11}}, "headers": [], "checksum": null, "serialized_key_size": 114, "serialized_value_size": 3593, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20663, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "__dbz__physicalTableIdentifier": "zenodo-qa.public.github_releases"}, "value": {"before": {"created": 1695210768812484, "updated": 1695210768870629, "id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "release_id": 121854239, "tag": "v4", "errors": null, "repository_id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "event_id": "1e596bad-1bb3-4749-8a5e-dd9f1552ebc2", "record_id": null, "status": "P"}, "after": {"created": 1695210768812484, "updated": 1695210773130108, "id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "release_id": 121854239, "tag": "v4", "errors": null, "repository_id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "event_id": "1e596bad-1bb3-4749-8a5e-dd9f1552ebc2", "record_id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "status": "P"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225987400\"]", "schema": "public", "table": "github_releases", "txId": 563886523, "lsn": 1473225987400, "xmin": null}, "op": "u", "ts_ms": 1695210773346, "transaction": {"id": "563886523:1473225987400", "total_order": 48, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 113, "serialized_value_size": 1017, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20664, "timestamp": 1695210773621, "timestamp_type": 0, "key": {"id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "__dbz__physicalTableIdentifier": "zenodo-qa.public.github_releases"}, "value": {"before": {"created": 1695210768812484, "updated": 1695210773130108, "id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "release_id": 121854239, "tag": "v4", "errors": null, "repository_id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "event_id": "1e596bad-1bb3-4749-8a5e-dd9f1552ebc2", "record_id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "status": "P"}, "after": {"created": 1695210768812484, "updated": 1695210773265633, "id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "release_id": 121854239, "tag": "v4", "errors": null, "repository_id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "event_id": "1e596bad-1bb3-4749-8a5e-dd9f1552ebc2", "record_id": "a0a2c8be-f375-4913-a77f-45b9ec7c7498", "status": "D"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210773267, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225216920\",\"1473225989432\"]", "schema": "public", "table": "github_releases", "txId": 563886523, "lsn": 1473225989432, "xmin": null}, "op": "u", "ts_ms": 1695210773346, "transaction": {"id": "563886523:1473225989432", "total_order": 49, "data_collection_order": 2}}, "headers": [], "checksum": null, "serialized_key_size": 113, "serialized_value_size": 1051, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/tests/actions/github/testdata/release_update.jsonl b/migrator/tests/actions/github/testdata/release_update.jsonl new file mode 100644 index 00000000..267eb22a --- /dev/null +++ b/migrator/tests/actions/github/testdata/release_update.jsonl @@ -0,0 +1 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20608, "timestamp": 1695210769612, "timestamp_type": 0, "key": {"id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "__dbz__physicalTableIdentifier": "zenodo-qa.public.github_releases"}, "value": {"before": {"created": 1695210768812484, "updated": 1695210768812492, "id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "release_id": 121854239, "tag": "v4", "errors": null, "repository_id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "event_id": "1e596bad-1bb3-4749-8a5e-dd9f1552ebc2", "record_id": null, "status": "R"}, "after": {"created": 1695210768812484, "updated": 1695210768870629, "id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "release_id": 121854239, "tag": "v4", "errors": null, "repository_id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "event_id": "1e596bad-1bb3-4749-8a5e-dd9f1552ebc2", "record_id": null, "status": "P"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210768872, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225184520\",\"1473225184968\"]", "schema": "public", "table": "github_releases", "txId": 563886519, "lsn": 1473225184968, "xmin": null}, "op": "u", "ts_ms": 1695210769295, "transaction": {"id": "563886519:1473225184968", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 113, "serialized_value_size": 982, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/tests/actions/github/testdata/repo_release.jsonl b/migrator/tests/actions/github/testdata/repo_release.jsonl new file mode 100644 index 00000000..11dae02b --- /dev/null +++ b/migrator/tests/actions/github/testdata/repo_release.jsonl @@ -0,0 +1,2 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20606, "timestamp": 1695210769612, "timestamp_type": 0, "key": {"id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "__dbz__physicalTableIdentifier": "zenodo-qa.public.github_repositories"}, "value": {"before": {"created": 1636638953162187, "updated": 1695210684525973, "id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "github_id": 427018972, "name": "ppanero/zenodo-release-test", "user_id": 86490, "ping": 1695210684525290, "hook": 434419984}, "after": {"created": 1636638953162187, "updated": 1695210768809652, "id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "github_id": 427018972, "name": "ppanero/zenodo-release-test", "user_id": 86490, "ping": 1695210684525290, "hook": 434419984}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210768815, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225170536\",\"1473225170576\"]", "schema": "public", "table": "github_repositories", "txId": 563886516, "lsn": 1473225170576, "xmin": null}, "op": "u", "ts_ms": 1695210769295, "transaction": {"id": "563886516:1473225170576", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 117, "serialized_value_size": 852, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20607, "timestamp": 1695210769612, "timestamp_type": 0, "key": {"id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "__dbz__physicalTableIdentifier": "zenodo-qa.public.github_releases"}, "value": {"before": null, "after": {"created": 1695210768812484, "updated": 1695210768812492, "id": "c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", "release_id": 121854239, "tag": "v4", "errors": null, "repository_id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "event_id": "1e596bad-1bb3-4749-8a5e-dd9f1552ebc2", "record_id": null, "status": "R"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210768815, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473225170536\",\"1473225170816\"]", "schema": "public", "table": "github_releases", "txId": 563886516, "lsn": 1473225170816, "xmin": null}, "op": "c", "ts_ms": 1695210769295, "transaction": {"id": "563886516:1473225170816", "total_order": 2, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 113, "serialized_value_size": 704, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/tests/actions/github/testdata/repo_update.jsonl b/migrator/tests/actions/github/testdata/repo_update.jsonl new file mode 100644 index 00000000..f5251551 --- /dev/null +++ b/migrator/tests/actions/github/testdata/repo_update.jsonl @@ -0,0 +1 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20678, "timestamp": 1695210863300, "timestamp_type": 0, "key": {"id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "__dbz__physicalTableIdentifier": "zenodo-qa.public.github_repositories"}, "value": {"before": {"created": 1636638953162187, "updated": 1695210861469381, "id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "github_id": 427018972, "name": "ppanero/zenodo-release-test", "user_id": 86490, "ping": 1695210684525290, "hook": 434420608}, "after": {"created": 1636638953162187, "updated": 1695210862598338, "id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "github_id": 427018972, "name": "ppanero/zenodo-release-test", "user_id": 86490, "ping": 1695210862597759, "hook": 434420608}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210862599, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473226177936\",\"1473226178000\"]", "schema": "public", "table": "github_repositories", "txId": 563886580, "lsn": 1473226178000, "xmin": null}, "op": "u", "ts_ms": 1695210862882, "transaction": {"id": "563886580:1473226178000", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 117, "serialized_value_size": 852, "serialized_header_size": -1} \ No newline at end of file From 92361a0ce64155d4129d90b0d221ce23ec32bc3c Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Wed, 20 Sep 2023 16:29:27 +0200 Subject: [PATCH 02/35] migrator: add gh hook enable/disable actions --- migrator/tests/actions/conftest.py | 3 + migrator/tests/actions/github/conftest.py | 21 ++ .../actions/github/test_github_actions.py | 287 ++++++++++++++++++ .../github/test_github_actions_stream.py | 139 +++++++++ ...ble_step1.json => hook_enable_step1.jsonl} | 0 .../github/testdata/hook_enable_step2.jsonl | 2 +- .../actions/transform/__init__.py | 4 + .../actions/transform/github.py | 136 +++++++++ .../transform/transactions.py | 6 + 9 files changed, 597 insertions(+), 1 deletion(-) create mode 100644 migrator/tests/actions/github/conftest.py create mode 100644 migrator/tests/actions/github/test_github_actions.py create mode 100644 migrator/tests/actions/github/test_github_actions_stream.py rename migrator/tests/actions/github/testdata/{hook_enable_step1.json => hook_enable_step1.jsonl} (100%) create mode 100644 migrator/zenodo_rdm_migrator/actions/transform/github.py diff --git a/migrator/tests/actions/conftest.py b/migrator/tests/actions/conftest.py index 400d0589..901da021 100644 --- a/migrator/tests/actions/conftest.py +++ b/migrator/tests/actions/conftest.py @@ -28,6 +28,7 @@ FilesInstance, FilesObjectVersion, ) +from invenio_rdm_migrator.streams.models.github import Repository, WebhookEvent from invenio_rdm_migrator.streams.models.oai import OAISet from invenio_rdm_migrator.streams.models.oauth import ServerClient, ServerToken from invenio_rdm_migrator.streams.models.pids import PersistentIdentifier @@ -161,10 +162,12 @@ def database(engine): RDMParentMetadata, RDMVersionState, RDMParentCommunityMetadata, + Repository, ServerClient, ServerToken, SessionActivity, User, + WebhookEvent, ] # create tables diff --git a/migrator/tests/actions/github/conftest.py b/migrator/tests/actions/github/conftest.py new file mode 100644 index 00000000..33c8bbaa --- /dev/null +++ b/migrator/tests/actions/github/conftest.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2023 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Migrator community actions tests configuration.""" + +from pathlib import Path + +import pytest + + +# FIXME: deduplicate from actions/communities tests +@pytest.fixture() +def tx_files(): + """Transactions file paths.""" + testdata_dir = Path(__file__).parent / "testdata" + assert testdata_dir.exists() + return {f.stem: f for f in testdata_dir.iterdir() if f.is_file()} diff --git a/migrator/tests/actions/github/test_github_actions.py b/migrator/tests/actions/github/test_github_actions.py new file mode 100644 index 00000000..8fa44d05 --- /dev/null +++ b/migrator/tests/actions/github/test_github_actions.py @@ -0,0 +1,287 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2023 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Test GitHub actions for RDM migration.""" + +from pathlib import Path + +import orjson +import pytest +from invenio_rdm_migrator.extract import Tx +from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType +from invenio_rdm_migrator.streams.actions import load + +from zenodo_rdm_migrator.actions.transform import ( + HookEventCreateAction, + HookEventUpdateAction, + HookRepoUpdateAction, +) + +## +# TOKENS +## + + +@pytest.fixture() +def hook_enable_step1_tx(): + """Transaction enable a hook. + + As it would be after the extraction step. + """ + datafile = Path(__file__).parent / "testdata" / "hook_enable_step1.jsonl" + with open(datafile, "rb") as reader: + ops = [orjson.loads(line)["value"] for line in reader] + + return {"tx_id": 1, "operations": ops} + + +class TestHookRepoUpdateAction: + """Create OAuth server token action tests.""" + + def test_matches_with_valid_data(self): + assert ( + HookRepoUpdateAction.matches_action( + Tx( + id=1, + operations=[ + { + "op": OperationType.UPDATE, + "source": {"table": "github_repositories"}, + "after": {}, + }, + ], + ) + ) + is True + ) + + def test_matches_with_invalid_data(self): + empty = [] + + wrong_op = [ + { + "op": OperationType.INSERT, + "source": {"table": "github_repositories"}, + "after": {}, + }, + ] + + extra_op = [ + { + "op": OperationType.UPDATE, + "source": {"table": "github_repositories"}, + "after": {}, + }, + {"op": OperationType.INSERT, "source": {"table": "another"}, "after": {}}, + ] + + for invalid_ops in [ + empty, + wrong_op, + extra_op, + ]: + assert ( + HookRepoUpdateAction.matches_action(Tx(id=1, operations=invalid_ops)) + is False + ) + + def test_transform_with_valid_data(self, hook_enable_step1_tx): + action = HookRepoUpdateAction( + Tx( + id=hook_enable_step1_tx["tx_id"], + operations=hook_enable_step1_tx["operations"], + ) + ) + assert isinstance(action.transform(), load.HookRepoUpdateAction) + + +@pytest.fixture() +def hook_enable_step2_tx(): + """Transaction enable a hook. + + As it would be after the extraction step. + """ + datafile = Path(__file__).parent / "testdata" / "hook_enable_step2.jsonl" + with open(datafile, "rb") as reader: + ops = [orjson.loads(line)["value"] for line in reader] + + return {"tx_id": 1, "operations": ops} + + +class TestHookEventCreateAction: + """Create OAuth server token action tests.""" + + def test_matches_with_valid_data(self): + with_token = [ + { + "op": OperationType.INSERT, + "source": {"table": "webhooks_events"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, + "source": {"table": "oauth2server_token"}, + "after": {}, + }, + ] + without_token = [ + { + "op": OperationType.INSERT, + "source": {"table": "webhooks_events"}, + "after": {}, + } + ] + for valid_ops in [ + with_token, + without_token, + ]: + assert ( + HookEventCreateAction.matches_action(Tx(id=1, operations=valid_ops)) + is True + ) + + def test_matches_with_invalid_data(self): + empty = [] + + wrong_op = [ + { + "op": OperationType.UPDATE, + "source": {"table": "webhooks_events"}, + "after": {}, + } + ] + + extra_op = [ + { + "op": OperationType.UPDATE, + "source": {"table": "webhooks_events"}, + "after": {}, + }, + {"op": OperationType.INSERT, "source": {"table": "another"}, "after": {}}, + ] + only_token = [ + { + "op": OperationType.UPDATE, + "source": {"table": "oauth2server_token"}, + "after": {}, + } + ] + + for invalid_ops in [ + empty, + wrong_op, + extra_op, + only_token, + ]: + assert ( + HookEventCreateAction.matches_action(Tx(id=1, operations=invalid_ops)) + is False + ) + + def test_transform_with_valid_data(self, hook_enable_step2_tx): + action = HookEventCreateAction( + Tx( + id=hook_enable_step2_tx["tx_id"], + operations=hook_enable_step2_tx["operations"], + ) + ) + assert isinstance(action.transform(), load.HookEventCreateAction) + + +@pytest.fixture() +def hook_disable_tx(): + """Transaction disable a hook. + + As it would be after the extraction step. + """ + datafile = Path(__file__).parent / "testdata" / "hook_disable.jsonl" + with open(datafile, "rb") as reader: + ops = [orjson.loads(line)["value"] for line in reader] + + return {"tx_id": 1, "operations": ops} + + +@pytest.fixture() +def hook_update_tx(): + """Transaction update a hook. + + As it would be after the extraction step. + """ + datafile = Path(__file__).parent / "testdata" / "hook_update.jsonl" + with open(datafile, "rb") as reader: + ops = [orjson.loads(line)["value"] for line in reader] + + return {"tx_id": 1, "operations": ops} + + +class TestHookEventUpdateAction: + """Create OAuth server token action tests.""" + + def test_matches_with_valid_data(self): + assert ( + HookEventUpdateAction.matches_action( + Tx( + id=1, + operations=[ + { + "op": OperationType.UPDATE, + "source": {"table": "webhooks_events"}, + "after": {}, + }, + ], + ) + ) + is True + ) + + def test_matches_with_invalid_data(self): + empty = [] + + wrong_op = [ + { + "op": OperationType.INSERT, + "source": {"table": "webhooks_events"}, + "after": {}, + } + ] + + extra_op = [ + { + "op": OperationType.UPDATE, + "source": {"table": "webhooks_events"}, + "after": {}, + }, + {"op": OperationType.INSERT, "source": {"table": "another"}, "after": {}}, + ] + + for invalid_ops in [ + empty, + wrong_op, + extra_op, + ]: + assert ( + HookEventUpdateAction.matches_action(Tx(id=1, operations=invalid_ops)) + is False + ) + + def test_transform_with_valid_data_disable(self, hook_disable_tx): + action = HookEventUpdateAction( + Tx( + id=hook_disable_tx["tx_id"], + operations=hook_disable_tx["operations"], + ) + ) + assert isinstance(action.transform(), load.HookEventUpdateAction) + + def test_transform_with_valid_data_update(self, hook_update_tx): + action = HookEventUpdateAction( + Tx( + id=hook_update_tx["tx_id"], + operations=hook_update_tx["operations"], + ) + ) + assert isinstance(action.transform(), load.HookEventUpdateAction) diff --git a/migrator/tests/actions/github/test_github_actions_stream.py b/migrator/tests/actions/github/test_github_actions_stream.py new file mode 100644 index 00000000..27686888 --- /dev/null +++ b/migrator/tests/actions/github/test_github_actions_stream.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2023 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Test GitHub action stream for RDM migration.""" + +import pytest +import sqlalchemy as sa +from invenio_rdm_migrator.streams import Stream +from invenio_rdm_migrator.streams.models.github import Repository, WebhookEvent +from invenio_rdm_migrator.streams.models.oauth import ServerToken + +from zenodo_rdm_migrator.transform.transactions import ZenodoTxTransform + + +@pytest.fixture(scope="function") +def db_repository(database, session): + repo = Repository( + created="2022-01-01T00:00:00", + updated="2022-01-01T00:00:00", + id="0d1b629d-7992-4650-b0b0-8908a0322bca", + github_id=427018972, + name="ppanero/zenodo-release-test", + # the following None means the hook was removed, but it does not affect the tests + user_id=None, + hook=None, + ) + + session.add(repo) + session.commit() + + return session + + +def test_github_hook_repo_update( + database, db_repository, pg_tx_load, test_extract_cls, tx_files +): + stream = Stream( + name="action", + extract=test_extract_cls(tx_files["hook_enable_step1"]), + transform=ZenodoTxTransform(), + load=pg_tx_load, + ) + stream.run() + + repo = db_repository.scalars(sa.select(Repository)).one() + assert repo.hook == 434420608 + assert repo.user_id == 86490 + + +@pytest.fixture(scope="function") +def db_token(database, session): + token = ServerToken( + id=156666, + client_id="SZLrR8ApZPeBjqj7uMB1JWXavhxebu6V0mwMtvMr", + user_id=123456, + token_type="bearer", + access_token="cH4ng3DzbXd4QTcrRjFMcTVMRHl3QlY2Rkdib0VwREY4aDhPcHo2dUt2ZnZ3OVVPa1BvRDl0L1NRZmFrdXNIU2hJR2JWc0NHZDZSVEhVT2JQcmdjS1E9PQ==", + refresh_token=None, + expires=None, + _scopes="tokens:generate user:email", + is_personal=True, + is_internal=False, + ) + + session.add(token) + session.commit() + + return session + + +def test_github_hook_event_create( + database, db_token, pg_tx_load, test_extract_cls, tx_files +): + stream = Stream( + name="action", + extract=test_extract_cls(tx_files["hook_enable_step2"]), + transform=ZenodoTxTransform(), + load=pg_tx_load, + ) + stream.run() + + token = db_token.scalars(sa.select(ServerToken)).one() + assert token.expires + assert db_token.scalars(sa.select(WebhookEvent)).one() + + +def test_github_hook_disable( + database, db_repository, pg_tx_load, test_extract_cls, tx_files +): + stream = Stream( + name="action", + extract=test_extract_cls(tx_files["hook_disable"]), + transform=ZenodoTxTransform(), + load=pg_tx_load, + ) + stream.run() + + repo = db_repository.scalars(sa.select(Repository)).one() + assert not repo.hook + + +@pytest.fixture(scope="function") +def db_hook_event(database, session): + token = WebhookEvent( + created="2022-01-01T00:00:00", + updated="2022-01-01T00:00:00", + id="189d88dd-22d9-40d1-b3af-9da4b2bc4870", + receiver_id="github", + user_id=86490, + payload='{"action": "published", "release": {"body": "Zenodo testing migration"}}', + payload_headers=None, + response='{"status": 202, "message": "Accepted."}', + response_headers=None, + response_code=202, + ) + + session.add(token) + session.commit() + + return session + + +def test_github_hook_event_update( + database, db_hook_event, pg_tx_load, test_extract_cls, tx_files +): + stream = Stream( + name="action", + extract=test_extract_cls(tx_files["hook_update"]), + transform=ZenodoTxTransform(), + load=pg_tx_load, + ) + stream.run() + + hook = db_hook_event.scalars(sa.select(WebhookEvent)).one() + assert hook.response_code == 409 diff --git a/migrator/tests/actions/github/testdata/hook_enable_step1.json b/migrator/tests/actions/github/testdata/hook_enable_step1.jsonl similarity index 100% rename from migrator/tests/actions/github/testdata/hook_enable_step1.json rename to migrator/tests/actions/github/testdata/hook_enable_step1.jsonl diff --git a/migrator/tests/actions/github/testdata/hook_enable_step2.jsonl b/migrator/tests/actions/github/testdata/hook_enable_step2.jsonl index 7546d803..048f6de8 100644 --- a/migrator/tests/actions/github/testdata/hook_enable_step2.jsonl +++ b/migrator/tests/actions/github/testdata/hook_enable_step2.jsonl @@ -1,2 +1,2 @@ -{"topic": "zenodo-qa.public", "partition": 0, "offset": 20676, "timestamp": 1695210863300, "timestamp_type": 0, "key": {"id": 157872, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauth2server_token"}, "value": {"before": {"id": 157872, "client_id": "LwpyqQN5G32O1XFuPK2DdI0B5MiEjgMsQwKmtgKm", "user_id": 86490, "token_type": "bearer", "access_token": "aHNPTlJQR2VIQldqZE5tZzE5cnJIdzdyWkhRRk5DVVdJSzJtallYVXJIVklnMkFjQlh3WVZ0Vnk5TkFVWU8xcFJMd2NlZWRyRDhqMEdERmU0NWpVcVE9PQ==", "refresh_token": null, "expires": 1695214368940479, "_scopes": "webhooks:event", "is_personal": true, "is_internal": true}, "after": {"id": 157872, "client_id": "LwpyqQN5G32O1XFuPK2DdI0B5MiEjgMsQwKmtgKm", "user_id": 86490, "token_type": "bearer", "access_token": "aHNPTlJQR2VIQldqZE5tZzE5cnJIdzdyWkhRRk5DVVdJSzJtallYVXJIVklnMkFjQlh3WVZ0Vnk5TkFVWU8xcFJMd2NlZWRyRDhqMEdERmU0NWpVcVE9PQ==", "refresh_token": null, "expires": 1695214462557405, "_scopes": "webhooks:event", "is_personal": true, "is_internal": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210862573, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473226162280\",\"1473226162280\"]", "schema": "public", "table": "oauth2server_token", "txId": 563886579, "lsn": 1473226162280, "xmin": null}, "op": "u", "ts_ms": 1695210862882, "transaction": {"id": "563886579:1473226162280", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 84, "serialized_value_size": 1135, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 20676, "timestamp": 1695210863300, "timestamp_type": 0, "key": {"id": 156666, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauth2server_token"}, "value": {"before": {"id": 156666, "client_id": "LwpyqQN5G32O1XFuPK2DdI0B5MiEjgMsQwKmtgKm", "user_id": 86490, "token_type": "bearer", "access_token": "aHNPTlJQR2VIQldqZE5tZzE5cnJIdzdyWkhRRk5DVVdJSzJtallYVXJIVklnMkFjQlh3WVZ0Vnk5TkFVWU8xcFJMd2NlZWRyRDhqMEdERmU0NWpVcVE9PQ==", "refresh_token": null, "expires": 1695214368940479, "_scopes": "webhooks:event", "is_personal": true, "is_internal": true}, "after": {"id": 156666, "client_id": "LwpyqQN5G32O1XFuPK2DdI0B5MiEjgMsQwKmtgKm", "user_id": 86490, "token_type": "bearer", "access_token": "aHNPTlJQR2VIQldqZE5tZzE5cnJIdzdyWkhRRk5DVVdJSzJtallYVXJIVklnMkFjQlh3WVZ0Vnk5TkFVWU8xcFJMd2NlZWRyRDhqMEdERmU0NWpVcVE9PQ==", "refresh_token": null, "expires": 1695214462557405, "_scopes": "webhooks:event", "is_personal": true, "is_internal": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210862573, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473226162280\",\"1473226162280\"]", "schema": "public", "table": "oauth2server_token", "txId": 563886579, "lsn": 1473226162280, "xmin": null}, "op": "u", "ts_ms": 1695210862882, "transaction": {"id": "563886579:1473226162280", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 84, "serialized_value_size": 1135, "serialized_header_size": -1} {"topic": "zenodo-qa.public", "partition": 0, "offset": 20677, "timestamp": 1695210863300, "timestamp_type": 0, "key": {"id": "f4dc4bc2-e157-4612-8600-0e9b4ba0f450", "__dbz__physicalTableIdentifier": "zenodo-qa.public.webhooks_events"}, "value": {"before": null, "after": {"created": 1695210862569231, "updated": 1695210862569242, "id": "f4dc4bc2-e157-4612-8600-0e9b4ba0f450", "receiver_id": "github", "user_id": 86490, "payload": "{\"hook_id\": 434420608, \"hook\": {\"deliveries_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks/434420608/deliveries\", \"name\": \"web\", \"url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks/434420608\", \"type\": \"Repository\", \"created_at\": \"2023-09-20T11:54:21Z\", \"updated_at\": \"2023-09-20T11:54:21Z\", \"test_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks/434420608/test\", \"id\": 434420608, \"ping_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks/434420608/pings\", \"active\": true, \"config\": {\"url\": \"https://sandbox.zenodo.org/api/hooks/receivers/github/events/?access_token=blX2X1zR6N8M0eN3yCdKS6lJ39ZbprdMgvz65rVfkTgy6WCpdDgL7bhiIj1S\", \"insecure_ssl\": \"0\", \"secret\": \"********\", \"content_type\": \"json\"}, \"events\": [\"release\"], \"last_response\": {\"status\": \"unused\", \"message\": null, \"code\": null}}, \"zen\": \"Design for failure.\", \"sender\": {\"following_url\": \"https://api.github.com/users/ppanero/following{/other_user}\", \"events_url\": \"https://api.github.com/users/ppanero/events{/privacy}\", \"avatar_url\": \"https://avatars.githubusercontent.com/u/6756943?v=4\", \"url\": \"https://api.github.com/users/ppanero\", \"gists_url\": \"https://api.github.com/users/ppanero/gists{/gist_id}\", \"html_url\": \"https://github.com/ppanero\", \"subscriptions_url\": \"https://api.github.com/users/ppanero/subscriptions\", \"node_id\": \"MDQ6VXNlcjY3NTY5NDM=\", \"repos_url\": \"https://api.github.com/users/ppanero/repos\", \"received_events_url\": \"https://api.github.com/users/ppanero/received_events\", \"gravatar_id\": \"\", \"starred_url\": \"https://api.github.com/users/ppanero/starred{/owner}{/repo}\", \"site_admin\": false, \"login\": \"ppanero\", \"type\": \"User\", \"id\": 6756943, \"followers_url\": \"https://api.github.com/users/ppanero/followers\", \"organizations_url\": \"https://api.github.com/users/ppanero/orgs\"}, \"repository\": {\"issues_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues{/number}\", \"deployments_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/deployments\", \"stargazers_count\": 0, \"forks_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/forks\", \"mirror_url\": null, \"allow_forking\": true, \"subscription_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/subscription\", \"topics\": [], \"notifications_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/notifications{?since,all,participating}\", \"collaborators_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/collaborators{/collaborator}\", \"updated_at\": \"2021-12-02T13:03:26Z\", \"private\": false, \"pulls_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/pulls{/number}\", \"disabled\": false, \"issue_comment_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues/comments{/number}\", \"labels_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/labels{/name}\", \"has_wiki\": true, \"full_name\": \"ppanero/zenodo-release-test\", \"owner\": {\"following_url\": \"https://api.github.com/users/ppanero/following{/other_user}\", \"events_url\": \"https://api.github.com/users/ppanero/events{/privacy}\", \"avatar_url\": \"https://avatars.githubusercontent.com/u/6756943?v=4\", \"url\": \"https://api.github.com/users/ppanero\", \"gists_url\": \"https://api.github.com/users/ppanero/gists{/gist_id}\", \"html_url\": \"https://github.com/ppanero\", \"subscriptions_url\": \"https://api.github.com/users/ppanero/subscriptions\", \"node_id\": \"MDQ6VXNlcjY3NTY5NDM=\", \"repos_url\": \"https://api.github.com/users/ppanero/repos\", \"received_events_url\": \"https://api.github.com/users/ppanero/received_events\", \"gravatar_id\": \"\", \"starred_url\": \"https://api.github.com/users/ppanero/starred{/owner}{/repo}\", \"site_admin\": false, \"login\": \"ppanero\", \"type\": \"User\", \"id\": 6756943, \"followers_url\": \"https://api.github.com/users/ppanero/followers\", \"organizations_url\": \"https://api.github.com/users/ppanero/orgs\"}, \"statuses_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/statuses/{sha}\", \"id\": 427018972, \"keys_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/keys{/key_id}\", \"description\": null, \"tags_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/tags\", \"archived\": false, \"downloads_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/downloads\", \"assignees_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/assignees{/user}\", \"watchers\": 0, \"contents_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/contents/{+path}\", \"has_pages\": false, \"git_refs_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/refs{/sha}\", \"has_discussions\": false, \"has_projects\": true, \"clone_url\": \"https://github.com/ppanero/zenodo-release-test.git\", \"watchers_count\": 0, \"git_tags_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/tags{/sha}\", \"milestones_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/milestones{/number}\", \"languages_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/languages\", \"size\": 8, \"homepage\": null, \"fork\": false, \"commits_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/commits{/sha}\", \"releases_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/releases{/id}\", \"issue_events_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/issues/events{/number}\", \"archive_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/{archive_format}{/ref}\", \"comments_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/comments{/number}\", \"events_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/events\", \"contributors_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/contributors\", \"html_url\": \"https://github.com/ppanero/zenodo-release-test\", \"visibility\": \"public\", \"forks\": 0, \"compare_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/compare/{base}...{head}\", \"open_issues\": 0, \"node_id\": \"R_kgDOGXPK3A\", \"git_url\": \"git://github.com/ppanero/zenodo-release-test.git\", \"svn_url\": \"https://github.com/ppanero/zenodo-release-test\", \"merges_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/merges\", \"has_issues\": true, \"ssh_url\": \"git@github.com:ppanero/zenodo-release-test.git\", \"blobs_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/blobs{/sha}\", \"git_commits_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/commits{/sha}\", \"hooks_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/hooks\", \"has_downloads\": true, \"license\": {\"spdx_id\": \"MPL-2.0\", \"url\": \"https://api.github.com/licenses/mpl-2.0\", \"node_id\": \"MDc6TGljZW5zZTE0\", \"name\": \"Mozilla Public License 2.0\", \"key\": \"mpl-2.0\"}, \"name\": \"zenodo-release-test\", \"language\": null, \"url\": \"https://api.github.com/repos/ppanero/zenodo-release-test\", \"created_at\": \"2021-11-11T13:53:02Z\", \"open_issues_count\": 0, \"is_template\": false, \"pushed_at\": \"2023-09-20T11:50:38Z\", \"web_commit_signoff_required\": false, \"forks_count\": 0, \"default_branch\": \"main\", \"teams_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/teams\", \"trees_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/git/trees{/sha}\", \"branches_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/branches{/branch}\", \"subscribers_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/subscribers\", \"stargazers_url\": \"https://api.github.com/repos/ppanero/zenodo-release-test/stargazers\"}}", "payload_headers": null, "response": "{\"status\": 202, \"message\": \"Accepted.\"}", "response_headers": null, "response_code": 202}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210862573, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473226162280\",\"1473226164816\"]", "schema": "public", "table": "webhooks_events", "txId": 563886579, "lsn": 1473226164816, "xmin": null}, "op": "c", "ts_ms": 1695210862882, "transaction": {"id": "563886579:1473226164816", "total_order": 2, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 113, "serialized_value_size": 8636, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py index dd6e6eb0..a450ef9c 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py @@ -14,6 +14,7 @@ ) from .drafts import DraftCreateAction, DraftEditAction, DraftPublishAction from .files import DraftFileUploadAction +from .github import HookEventCreateAction, HookEventUpdateAction, HookRepoUpdateAction from .oauth import ( OAuthApplicationCreateAction, OAuthApplicationDeleteAction, @@ -32,6 +33,9 @@ "DraftEditAction", "DraftFileUploadAction", "DraftPublishAction", + "HookEventCreateAction", + "HookEventUpdateAction", + "HookRepoUpdateAction", "OAuthApplicationCreateAction", "OAuthApplicationDeleteAction", "OAuthApplicationUpdateAction", diff --git a/migrator/zenodo_rdm_migrator/actions/transform/github.py b/migrator/zenodo_rdm_migrator/actions/transform/github.py new file mode 100644 index 00000000..4b38b06d --- /dev/null +++ b/migrator/zenodo_rdm_migrator/actions/transform/github.py @@ -0,0 +1,136 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2023 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. + +"""Invenio RDM migration github actions module.""" + +from invenio_rdm_migrator.actions import TransformAction +from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType +from invenio_rdm_migrator.streams.actions import load +from invenio_rdm_migrator.streams.github import GitHubRepositoryTransform +from invenio_rdm_migrator.streams.oauth import OAuthServerTokenTransform +from invenio_rdm_migrator.transform import IdentityTransform + + +class HookRepoUpdateAction(TransformAction): + """Zenodo to RDM GitHub repository update of a webhook. + + This will serve for hook enabling first phase and for disabling, as well as for + normal repository updates. + """ + + name = "gh-hook-repo-update" + load_cls = load.HookRepoUpdateAction + + @classmethod + def matches_action(cls, tx): + """Checks if the data corresponds with that required by the action.""" + if len(tx.operations) != 1: + return False + + op = tx.operations[0] + + return ( + op["source"]["table"] == "github_repositories" + and op["op"] == OperationType.UPDATE + ) + + def _transform_data(self): + """Transforms the data and returns dictionary.""" + op = self.tx.operations[0] + + result = { + "tx_id": self.tx.id, + "gh_repository": GitHubRepositoryTransform()._transform(op["after"]), + } + + return result + + +class HookEventCreateAction(TransformAction): + """Zenodo to RDM webhook create action. + + This will serve for hook enabling first phase and for disabling, as well as for + normal repository updates. + """ + + name = "gh-hook-event-create" + load_cls = load.HookEventCreateAction + + @classmethod + def matches_action(cls, tx): + """Checks if the data corresponds with that required by the action.""" + if len(tx.operations) == 1: + op = tx.operations[0] + return ( + op["source"]["table"] == "webhooks_events" + and op["op"] == OperationType.INSERT + ) + + if len(tx.operations) == 2: + rules = { + "webhooks_events": OperationType.INSERT, + "oauth2server_token": OperationType.UPDATE, + } + + for op in tx.operations: + rule = rules.pop(op["source"]["table"], None) + if not rule or rule != op["op"]: + return False + + return True + + return False + + def _transform_data(self): + """Transforms the data and returns dictionary.""" + webhook_event = None + server_token = None + for op in self.tx.operations: + if op["source"]["table"] == "webhooks_events": + webhook_event = op["after"] + elif op["source"]["table"] == "oauth2server_token": + server_token = op["after"] + + result = { + "tx_id": self.tx.id, + "webhook_event": IdentityTransform()._transform(webhook_event), + } + if server_token: + result["oauth_token"] = OAuthServerTokenTransform()._transform(server_token) + + return result + + +class HookEventUpdateAction(TransformAction): + """Zenodo to RDM webhook event update.""" + + name = "gh-hook-event-update" + load_cls = load.HookEventUpdateAction + + @classmethod + def matches_action(cls, tx): + """Checks if the data corresponds with that required by the action.""" + if len(tx.operations) != 1: + return False + + op = tx.operations[0] + + return ( + op["source"]["table"] == "webhooks_events" + and op["op"] == OperationType.UPDATE + ) + + def _transform_data(self): + """Transforms the data and returns dictionary.""" + op = self.tx.operations[0] + + result = { + "tx_id": self.tx.id, + "webhook_event": IdentityTransform()._transform(op["after"]), + } + + return result diff --git a/migrator/zenodo_rdm_migrator/transform/transactions.py b/migrator/zenodo_rdm_migrator/transform/transactions.py index 2a7b84f5..e5026aaa 100644 --- a/migrator/zenodo_rdm_migrator/transform/transactions.py +++ b/migrator/zenodo_rdm_migrator/transform/transactions.py @@ -17,6 +17,9 @@ DraftCreateAction, DraftEditAction, DraftFileUploadAction, + HookEventCreateAction, + HookEventUpdateAction, + HookRepoUpdateAction, OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, @@ -39,6 +42,9 @@ class ZenodoTxTransform(BaseTxTransform): DraftCreateAction, DraftEditAction, DraftFileUploadAction, + HookEventCreateAction, + HookEventUpdateAction, + HookRepoUpdateAction, OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, From 99175956f6ff14024043935ca801108d4697550f Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Thu, 21 Sep 2023 11:15:05 +0200 Subject: [PATCH 03/35] migrator: add gh release reception and update actions --- migrator/tests/actions/conftest.py | 3 +- .../actions/github/test_github_actions.py | 178 +++++++++++++++++- .../github/test_github_actions_stream.py | 61 +++++- ...ase_create.jsonl => release_process.jsonl} | 0 ...po_release.jsonl => release_receive.jsonl} | 0 .../actions/github/testdata/repo_update.jsonl | 1 - .../actions/transform/__init__.py | 10 +- .../actions/transform/github.py | 86 +++++++++ .../transform/transactions.py | 4 + 9 files changed, 338 insertions(+), 5 deletions(-) rename migrator/tests/actions/github/testdata/{release_create.jsonl => release_process.jsonl} (100%) rename migrator/tests/actions/github/testdata/{repo_release.jsonl => release_receive.jsonl} (100%) delete mode 100644 migrator/tests/actions/github/testdata/repo_update.jsonl diff --git a/migrator/tests/actions/conftest.py b/migrator/tests/actions/conftest.py index 901da021..69a9301c 100644 --- a/migrator/tests/actions/conftest.py +++ b/migrator/tests/actions/conftest.py @@ -28,7 +28,7 @@ FilesInstance, FilesObjectVersion, ) -from invenio_rdm_migrator.streams.models.github import Repository, WebhookEvent +from invenio_rdm_migrator.streams.models.github import Release, Repository, WebhookEvent from invenio_rdm_migrator.streams.models.oai import OAISet from invenio_rdm_migrator.streams.models.oauth import ServerClient, ServerToken from invenio_rdm_migrator.streams.models.pids import PersistentIdentifier @@ -162,6 +162,7 @@ def database(engine): RDMParentMetadata, RDMVersionState, RDMParentCommunityMetadata, + Release, Repository, ServerClient, ServerToken, diff --git a/migrator/tests/actions/github/test_github_actions.py b/migrator/tests/actions/github/test_github_actions.py index 8fa44d05..5bd49a70 100644 --- a/migrator/tests/actions/github/test_github_actions.py +++ b/migrator/tests/actions/github/test_github_actions.py @@ -19,10 +19,12 @@ HookEventCreateAction, HookEventUpdateAction, HookRepoUpdateAction, + ReleaseReceiveAction, + ReleaseUpdateAction, ) ## -# TOKENS +# Hooks ## @@ -285,3 +287,177 @@ def test_transform_with_valid_data_update(self, hook_update_tx): ) ) assert isinstance(action.transform(), load.HookEventUpdateAction) + + +## +# Releases +## + + +@pytest.fixture() +def release_receive_tx(): + """Transaction receive a release. + + As it would be after the extraction step. + """ + datafile = Path(__file__).parent / "testdata" / "release_receive.jsonl" + with open(datafile, "rb") as reader: + ops = [orjson.loads(line)["value"] for line in reader] + + return {"tx_id": 1, "operations": ops} + + +class TestReleaseReceiveAction: + """Release reception action tests.""" + + def test_matches_with_valid_data(self): + assert ( + ReleaseReceiveAction.matches_action( + Tx( + id=1, + operations=[ + { + "op": OperationType.UPDATE, + "source": {"table": "github_repositories"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "github_releases"}, + "after": {}, + }, + ], + ) + ) + is True + ) + + def test_matches_with_invalid_data(self): + empty = [] + + no_release = [ + { + "op": OperationType.UPDATE, + "source": {"table": "github_repositories"}, + "after": {}, + }, + ] + + no_repository = [ + { + "op": OperationType.INSERT, + "source": {"table": "github_releases"}, + "after": {}, + }, + ] + + wrong_op = [ + { + "op": OperationType.INSERT, + "source": {"table": "github_repositories"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "github_releases"}, + "after": {}, + }, + ] + + extra_op = [ + { + "op": OperationType.UPDATE, + "source": {"table": "github_repositories"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "github_releases"}, + "after": {}, + }, + {"op": OperationType.INSERT, "source": {"table": "another"}, "after": {}}, + ] + + for invalid_ops in [empty, no_release, no_repository, wrong_op, extra_op]: + assert ( + ReleaseReceiveAction.matches_action(Tx(id=1, operations=invalid_ops)) + is False + ) + + def test_transform_with_valid_data(self, release_receive_tx): + action = ReleaseReceiveAction( + Tx( + id=release_receive_tx["tx_id"], + operations=release_receive_tx["operations"], + ) + ) + assert isinstance(action.transform(), load.ReleaseReceiveAction) + + +@pytest.fixture() +def release_update_tx(): + """Transaction update a release. + + As it would be after the extraction step. + """ + datafile = Path(__file__).parent / "testdata" / "release_update.jsonl" + with open(datafile, "rb") as reader: + ops = [orjson.loads(line)["value"] for line in reader] + + return {"tx_id": 1, "operations": ops} + + +class TestReleaseUpdateAction: + """Release update action tests.""" + + def test_matches_with_valid_data(self): + assert ( + ReleaseUpdateAction.matches_action( + Tx( + id=1, + operations=[ + { + "op": OperationType.UPDATE, + "source": {"table": "github_releases"}, + "after": {}, + }, + ], + ) + ) + is True + ) + + def test_matches_with_invalid_data(self): + empty = [] + + wrong_op = [ + { + "op": OperationType.INSERT, + "source": {"table": "github_releases"}, + "after": {}, + }, + ] + + extra_op = [ + { + "op": OperationType.INSERT, + "source": {"table": "github_releases"}, + "after": {}, + }, + {"op": OperationType.INSERT, "source": {"table": "another"}, "after": {}}, + ] + + for invalid_ops in [empty, wrong_op, extra_op]: + assert ( + ReleaseUpdateAction.matches_action(Tx(id=1, operations=invalid_ops)) + is False + ) + + def test_transform_with_valid_data(self, release_update_tx): + action = ReleaseUpdateAction( + Tx( + id=release_update_tx["tx_id"], + operations=release_update_tx["operations"], + ) + ) + assert isinstance(action.transform(), load.ReleaseUpdateAction) diff --git a/migrator/tests/actions/github/test_github_actions_stream.py b/migrator/tests/actions/github/test_github_actions_stream.py index 27686888..db967a57 100644 --- a/migrator/tests/actions/github/test_github_actions_stream.py +++ b/migrator/tests/actions/github/test_github_actions_stream.py @@ -10,11 +10,15 @@ import pytest import sqlalchemy as sa from invenio_rdm_migrator.streams import Stream -from invenio_rdm_migrator.streams.models.github import Repository, WebhookEvent +from invenio_rdm_migrator.streams.models.github import Release, Repository, WebhookEvent from invenio_rdm_migrator.streams.models.oauth import ServerToken from zenodo_rdm_migrator.transform.transactions import ZenodoTxTransform +# +# Hooks +# + @pytest.fixture(scope="function") def db_repository(database, session): @@ -137,3 +141,58 @@ def test_github_hook_event_update( hook = db_hook_event.scalars(sa.select(WebhookEvent)).one() assert hook.response_code == 409 + + +# +# Release +# + + +def test_receive_release( + database, db_repository, pg_tx_load, test_extract_cls, tx_files +): + stream = Stream( + name="action", + extract=test_extract_cls(tx_files["release_receive"]), + transform=ZenodoTxTransform(), + load=pg_tx_load, + ) + stream.run() + + repo = db_repository.scalars(sa.select(Repository)).one() + assert repo.updated == "2023-09-20T11:52:48.809652" + assert db_repository.scalars(sa.select(Release)).one() + + +@pytest.fixture(scope="function") +def db_release(database, session): + repo = Release( + created="2022-01-01T00:00:00", + updated="2022-01-01T00:00:00", + id="c9fc85cd-e8ec-4ba0-9a13-75a590f3fd15", + release_id=121854239, + tag="v4", + errors=None, + repository_id="0d1b629d-7992-4650-b0b0-8908a0322bca", + event_id="1e596bad-1bb3-4749-8a5e-dd9f1552ebc2", + record_id=None, + status="R", + ) + + session.add(repo) + session.commit() + + return session + + +def test_update_release(database, db_release, pg_tx_load, test_extract_cls, tx_files): + stream = Stream( + name="action", + extract=test_extract_cls(tx_files["release_update"]), + transform=ZenodoTxTransform(), + load=pg_tx_load, + ) + stream.run() + + release = db_release.scalars(sa.select(Release)).one() + assert release.status == "P" diff --git a/migrator/tests/actions/github/testdata/release_create.jsonl b/migrator/tests/actions/github/testdata/release_process.jsonl similarity index 100% rename from migrator/tests/actions/github/testdata/release_create.jsonl rename to migrator/tests/actions/github/testdata/release_process.jsonl diff --git a/migrator/tests/actions/github/testdata/repo_release.jsonl b/migrator/tests/actions/github/testdata/release_receive.jsonl similarity index 100% rename from migrator/tests/actions/github/testdata/repo_release.jsonl rename to migrator/tests/actions/github/testdata/release_receive.jsonl diff --git a/migrator/tests/actions/github/testdata/repo_update.jsonl b/migrator/tests/actions/github/testdata/repo_update.jsonl deleted file mode 100644 index f5251551..00000000 --- a/migrator/tests/actions/github/testdata/repo_update.jsonl +++ /dev/null @@ -1 +0,0 @@ -{"topic": "zenodo-qa.public", "partition": 0, "offset": 20678, "timestamp": 1695210863300, "timestamp_type": 0, "key": {"id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "__dbz__physicalTableIdentifier": "zenodo-qa.public.github_repositories"}, "value": {"before": {"created": 1636638953162187, "updated": 1695210861469381, "id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "github_id": 427018972, "name": "ppanero/zenodo-release-test", "user_id": 86490, "ping": 1695210684525290, "hook": 434420608}, "after": {"created": 1636638953162187, "updated": 1695210862598338, "id": "0d1b629d-7992-4650-b0b0-8908a0322bca", "github_id": 427018972, "name": "ppanero/zenodo-release-test", "user_id": 86490, "ping": 1695210862597759, "hook": 434420608}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695210862599, "snapshot": "false", "db": "zenodo", "sequence": "[\"1473226177936\",\"1473226178000\"]", "schema": "public", "table": "github_repositories", "txId": 563886580, "lsn": 1473226178000, "xmin": null}, "op": "u", "ts_ms": 1695210862882, "transaction": {"id": "563886580:1473226178000", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 117, "serialized_value_size": 852, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py index a450ef9c..62522b5f 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py @@ -14,7 +14,13 @@ ) from .drafts import DraftCreateAction, DraftEditAction, DraftPublishAction from .files import DraftFileUploadAction -from .github import HookEventCreateAction, HookEventUpdateAction, HookRepoUpdateAction +from .github import ( + HookEventCreateAction, + HookEventUpdateAction, + HookRepoUpdateAction, + ReleaseReceiveAction, + ReleaseUpdateAction, +) from .oauth import ( OAuthApplicationCreateAction, OAuthApplicationDeleteAction, @@ -42,6 +48,8 @@ "OAuthServerTokenCreateAction", "OAuthServerTokenDeleteAction", "OAuthServerTokenUpdateAction", + "ReleaseReceiveAction", + "ReleaseUpdateAction", "UserDeactivationAction", "UserEditAction", "UserRegistrationAction", diff --git a/migrator/zenodo_rdm_migrator/actions/transform/github.py b/migrator/zenodo_rdm_migrator/actions/transform/github.py index 4b38b06d..7e43658e 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/github.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/github.py @@ -14,6 +14,10 @@ from invenio_rdm_migrator.streams.oauth import OAuthServerTokenTransform from invenio_rdm_migrator.transform import IdentityTransform +# +# Hooks +# + class HookRepoUpdateAction(TransformAction): """Zenodo to RDM GitHub repository update of a webhook. @@ -134,3 +138,85 @@ def _transform_data(self): } return result + + +# +# Releases +# + + +class ReleaseReceiveAction(TransformAction): + """Zenodo to RDM receive/create a GitHub release action.""" + + name = "gh-release-receive" + load_cls = load.ReleaseReceiveAction + + @classmethod + def matches_action(cls, tx): + """Checks if the data corresponds with that required by the action.""" + if len(tx.operations) != 2: + return False + + rules = { + "github_repositories": OperationType.UPDATE, + "github_releases": OperationType.INSERT, + } + + for op in tx.operations: + rule = rules.pop(op["source"]["table"], None) + if not rule or rule != op["op"]: + return False + + return True + + def _transform_data(self): + """Transforms the data and returns dictionary.""" + repo = None + release = None + for op in self.tx.operations: + self._microseconds_to_isodate( + data=op["after"], fields=["created", "updated"] + ) + if op["source"]["table"] == "github_repositories": + repo = op["after"] + elif op["source"]["table"] == "github_releases": + release = op["after"] + + return { + "tx_id": self.tx.id, + "gh_repository": GitHubRepositoryTransform()._transform(repo), + # using identity because it accounts for partial updates + "gh_release": IdentityTransform()._transform(release), + } + + +class ReleaseUpdateAction(TransformAction): + """Zenodo to RDM update a GitHub release action.""" + + name = "gh-release-update" + load_cls = load.ReleaseUpdateAction + + @classmethod + def matches_action(cls, tx): + """Checks if the data corresponds with that required by the action.""" + if len(tx.operations) != 1: + return False + + op = tx.operations[0] + + return ( + op["source"]["table"] == "github_releases" + and op["op"] == OperationType.UPDATE + ) + + def _transform_data(self): + """Transforms the data and returns dictionary.""" + op = self.tx.operations[0] + + self._microseconds_to_isodate(data=op["after"], fields=["created", "updated"]) + + return { + "tx_id": self.tx.id, + # using identity because it accounts for partial updates + "gh_release": IdentityTransform()._transform(op["after"]), + } diff --git a/migrator/zenodo_rdm_migrator/transform/transactions.py b/migrator/zenodo_rdm_migrator/transform/transactions.py index e5026aaa..653c1ace 100644 --- a/migrator/zenodo_rdm_migrator/transform/transactions.py +++ b/migrator/zenodo_rdm_migrator/transform/transactions.py @@ -26,6 +26,8 @@ OAuthServerTokenCreateAction, OAuthServerTokenDeleteAction, OAuthServerTokenUpdateAction, + ReleaseReceiveAction, + ReleaseUpdateAction, UserDeactivationAction, UserEditAction, UserRegistrationAction, @@ -51,6 +53,8 @@ class ZenodoTxTransform(BaseTxTransform): OAuthServerTokenCreateAction, OAuthServerTokenDeleteAction, OAuthServerTokenUpdateAction, + ReleaseReceiveAction, + ReleaseUpdateAction, UserDeactivationAction, UserEditAction, UserRegistrationAction, From 0be71449796aa4ff4f44b6fc2e08cb177369f814 Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Thu, 21 Sep 2023 14:25:30 +0200 Subject: [PATCH 04/35] migrator: transform dates in github actions --- migrator/zenodo_rdm_migrator/actions/transform/github.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/github.py b/migrator/zenodo_rdm_migrator/actions/transform/github.py index 7e43658e..52745a16 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/github.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/github.py @@ -46,6 +46,8 @@ def _transform_data(self): """Transforms the data and returns dictionary.""" op = self.tx.operations[0] + self._microseconds_to_isodate(data=op["after"], fields=["created", "updated"]) + result = { "tx_id": self.tx.id, "gh_repository": GitHubRepositoryTransform()._transform(op["after"]), @@ -95,8 +97,12 @@ def _transform_data(self): server_token = None for op in self.tx.operations: if op["source"]["table"] == "webhooks_events": + self._microseconds_to_isodate( + data=op["after"], fields=["created", "updated"] + ) webhook_event = op["after"] elif op["source"]["table"] == "oauth2server_token": + self._microseconds_to_isodate(data=op["after"], fields=["expires"]) server_token = op["after"] result = { @@ -132,6 +138,8 @@ def _transform_data(self): """Transforms the data and returns dictionary.""" op = self.tx.operations[0] + self._microseconds_to_isodate(data=op["after"], fields=["created", "updated"]) + result = { "tx_id": self.tx.id, "webhook_event": IdentityTransform()._transform(op["after"]), From 6801db741bd30abdc6e0d737d4f314b12ca354c4 Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Thu, 21 Sep 2023 18:25:24 +0200 Subject: [PATCH 05/35] migrator: gh release process action --- .../actions/transform/github.py | 188 ++++++++++++++++++ 1 file changed, 188 insertions(+) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/github.py b/migrator/zenodo_rdm_migrator/actions/transform/github.py index 52745a16..b9d73590 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/github.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/github.py @@ -7,6 +7,7 @@ """Invenio RDM migration github actions module.""" +import orjson from invenio_rdm_migrator.actions import TransformAction from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType from invenio_rdm_migrator.streams.actions import load @@ -14,6 +15,9 @@ from invenio_rdm_migrator.streams.oauth import OAuthServerTokenTransform from invenio_rdm_migrator.transform import IdentityTransform +from ...transform.entries.parents import ParentRecordEntry +from ...transform.entries.records.records import ZenodoRecordEntry + # # Hooks # @@ -228,3 +232,187 @@ def _transform_data(self): # using identity because it accounts for partial updates "gh_release": IdentityTransform()._transform(op["after"]), } + + +class ReleaseProcess(TransformAction): + """Zenodo to RDM process a GitHub release action.""" + + name = "gh-release-process" + load_cls = load.ReleaseProcess + + @staticmethod + def _patch_data(original, patch): + for key, value in patch.items(): + original[key] = value + + @classmethod + def matches_action(cls, tx): + """Checks if the data corresponds with that required by the action.""" + if len(tx.operations) != 1: + return False + + op = tx.operations[0] + + return ( + op["source"]["table"] == "github_releases" + and op["op"] == OperationType.UPDATE + ) + + def _transform_data(self): + """Transforms the data and returns dictionary.""" + # the logic behind having dictionaries is that there are many rows of the same + # table (e.g record and deposit bucket, many pids), and they cannot be discarded + # until the record metadata is final and complete. then for example we can discard + # the deposit bucket, however, until that point we had to patch the updates on all + # rows. + records = {} + pids = {} + dois = {} + buckets = {} + fos = {} # file objects + + record_oai = None + release = None + + for op in self.tx.operations: + table = op["source"]["table"] + if table == "pidstore_pid": + if op["before"] == None: # create + # ignore depid + if op["after"]["pid_type"] == "doi": + dois[op["after"]["object_uuid"]] = op["after"] + elif op["after"]["pid_type"] == "oai": + record_oai = op["after"] + elif op["after"]["pid_type"] == "recid": + # add pids then find which is which once the rec meta is processed + # there might be more than two, but excluded by the rec meta + pids[op["after"]["id"]] = op["after"] + else: # updates only to recids (not asserted in the code though) + recid = op["after"]["id"] + # not pid -> e.g. when the parent already exists + pid = pids.get(recid) + pids[recid] = ( + self._patch_data(pid, op["after"]) if pid else op["after"] + ) + + elif table == "files_bucket": + # should be only two, deposit and record + bucket_id = op["after"]["id"] + bucket = buckets.get(bucket_id) + buckets[bucket_id] = ( + self._patch_data(bucket, op["after"]) if bucket else op["after"] + ) + + elif table == "files_object": + # should be only two, deposit and record + bucket_id = op["after"]["bucket_id"] + fo = fos.get(bucket_id) + fos[bucket_id] = ( + self._patch_data(fo, op["after"]) if fo else op["after"] + ) + + elif table == "files_files": + # there is only one, both fo point to it + fi = self._patch_data(fi, op["after"]) if fi else op["after"] + + elif table == "records_metadata": + # there are two created: record and deposit + # updates to other records are discarded + # deposit is discarded after transformation + if op["before"] == None: # create + records[op["after"]["id"]] = op["after"] + else: + records[op["after"]["id"]] = self._patch_data( + records[op["after"]["id"]], op["after"] + ) + + elif table == "github_releases": + if not release: + release = op["after"] + else: + self._patch_data(release, op["after"]) + + else: + if table not in {"pidstore_redirect", "pidrelations_pidrelation"}: + # ignoring a table that was not considered, might be a bug + raise + + # transform records and discard deposit + record = None + for _, rec_meta in records.items(): + rec_meta["json"] = orjson.loads(rec_meta["json"]) + if not "deposits" in rec_meta["json"]["$schema"]: + # this code assumes there is only one record and one deposit + record = rec_meta + + record = ZenodoRecordEntry(partial=True).transform(rec_meta) + parent = ParentRecordEntry(partial=True).transform(record) + # communities should not be needed to clear since the GH releases + # don't belong to one + # parent["json"]["communities"] = {} + + # choose pid and parent pid based on record metadata + record_pid = pids[record["json"]["id"]] + try: + parent_pid = pids[parent["json"]["id"]] + except KeyError: + # when it's not the first release the parent was already created + parent_pid = { + "id": parent["json"]["pid"]["pk"], # change from state on load + "pid_type": "recid", + "pid_value": parent["json"]["id"], + "status": "R", + "object_type": "rec", + # object_uuid is assigned by the pks generation of the load action + } + + # calculate parent doi + record_doi = dois[record["id"]] + try: + parent_doi = dois[parent["id"]] + except KeyError: + # if not present it was already registered in a previous release + pass + + # choose the bucket according to the record bucket_id + bucket = buckets[record["bucket_id"]] + fo = fos[bucket["id"]] # file object + + # transform datetime fields + self._microseconds_to_isodate(data=record_pid, fields=["created", "updated"]) + self._microseconds_to_isodate(data=record_doi, fields=["created", "updated"]) + self._microseconds_to_isodate(data=record_oai, fields=["created", "updated"]) + self._microseconds_to_isodate(data=parent_pid, fields=["created", "updated"]) + self._microseconds_to_isodate(data=bucket, fields=["created", "updated"]) + self._microseconds_to_isodate(data=fo, fields=["created", "updated"]) + self._microseconds_to_isodate( + data=fi, fields=["created", "updated", "last_checked_at"] + ) + self._microseconds_to_isodate( + data=record, fields=["created", "updated", "expires_at"] + ) + self._microseconds_to_isodate(data=parent, fields=["created", "updated"]) + self._microseconds_to_isodate(data=release, fields=["created", "updated"]) + + # note: the load action needs to store the pids in state because the parent + # could be in the first release and this be the second, need to find it + result = { + "tx_id": self.tx.id, + "record_pid": record_pid, + "parent_pid": parent_pid, + "record_doi": record_doi, + "record_oai": record_oai, + "file_bucket": bucket, + "file_object": fo, + "file_instance": fi, + "parent": record, + "record": parent, + # using identity because it accounts for partial updates + "gh_release": IdentityTransform()._transform(release), + } + + if parent_doi: + self._microseconds_to_isodate( + data=parent_doi, fields=["created", "updated"] + ) + result["parent_doi"] = parent_doi From 3373e1f20130165e3347f87c52bb4f173180b3f2 Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Fri, 22 Sep 2023 10:01:19 +0200 Subject: [PATCH 06/35] migrator: load json fields in gh actions --- .../actions/transform/github.py | 30 +++++++++++++++---- 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/github.py b/migrator/zenodo_rdm_migrator/actions/transform/github.py index b9d73590..cb30a01d 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/github.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/github.py @@ -13,7 +13,7 @@ from invenio_rdm_migrator.streams.actions import load from invenio_rdm_migrator.streams.github import GitHubRepositoryTransform from invenio_rdm_migrator.streams.oauth import OAuthServerTokenTransform -from invenio_rdm_migrator.transform import IdentityTransform +from invenio_rdm_migrator.transform import IdentityTransform, JSONTransformMixin from ...transform.entries.parents import ParentRecordEntry from ...transform.entries.records.records import ZenodoRecordEntry @@ -60,7 +60,7 @@ def _transform_data(self): return result -class HookEventCreateAction(TransformAction): +class HookEventCreateAction(TransformAction, JSONTransformMixin): """Zenodo to RDM webhook create action. This will serve for hook enabling first phase and for disabling, as well as for @@ -104,6 +104,15 @@ def _transform_data(self): self._microseconds_to_isodate( data=op["after"], fields=["created", "updated"] ) + self._load_json_fields( + data=op["after"], + fields=[ + "payload", + "payload_headers", + "response", + "response_headers", + ], + ) webhook_event = op["after"] elif op["source"]["table"] == "oauth2server_token": self._microseconds_to_isodate(data=op["after"], fields=["expires"]) @@ -119,7 +128,7 @@ def _transform_data(self): return result -class HookEventUpdateAction(TransformAction): +class HookEventUpdateAction(TransformAction, JSONTransformMixin): """Zenodo to RDM webhook event update.""" name = "gh-hook-event-update" @@ -142,6 +151,10 @@ def _transform_data(self): """Transforms the data and returns dictionary.""" op = self.tx.operations[0] + self._load_json_fields( + data=op["after"], + fields=["payload", "payload_headers", "response", "response_headers"], + ) self._microseconds_to_isodate(data=op["after"], fields=["created", "updated"]) result = { @@ -157,7 +170,7 @@ def _transform_data(self): # -class ReleaseReceiveAction(TransformAction): +class ReleaseReceiveAction(TransformAction, JSONTransformMixin): """Zenodo to RDM receive/create a GitHub release action.""" name = "gh-release-receive" @@ -193,6 +206,7 @@ def _transform_data(self): repo = op["after"] elif op["source"]["table"] == "github_releases": release = op["after"] + self._load_json_fields(data=release, fields=["errors"]) return { "tx_id": self.tx.id, @@ -202,7 +216,7 @@ def _transform_data(self): } -class ReleaseUpdateAction(TransformAction): +class ReleaseUpdateAction(TransformAction, JSONTransformMixin): """Zenodo to RDM update a GitHub release action.""" name = "gh-release-update" @@ -225,6 +239,7 @@ def _transform_data(self): """Transforms the data and returns dictionary.""" op = self.tx.operations[0] + self._load_json_fields(data=op["after"], fields=["errors"]) self._microseconds_to_isodate(data=op["after"], fields=["created", "updated"]) return { @@ -234,7 +249,7 @@ def _transform_data(self): } -class ReleaseProcess(TransformAction): +class ReleaseProcess(TransformAction, JSONTransformMixin): """Zenodo to RDM process a GitHub release action.""" name = "gh-release-process" @@ -378,6 +393,9 @@ def _transform_data(self): bucket = buckets[record["bucket_id"]] fo = fos[bucket["id"]] # file object + # load json fields + self._load_json_fields(data=release, fields=["errors"]) + # transform datetime fields self._microseconds_to_isodate(data=record_pid, fields=["created", "updated"]) self._microseconds_to_isodate(data=record_doi, fields=["created", "updated"]) From 3ecef8e443092a07746b04e2d88c4d1f59d160c0 Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Fri, 22 Sep 2023 11:46:10 +0200 Subject: [PATCH 07/35] migrator: fix gh release process action --- .../actions/transform/github.py | 66 ++++++++++++------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/github.py b/migrator/zenodo_rdm_migrator/actions/transform/github.py index cb30a01d..7d069677 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/github.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/github.py @@ -249,11 +249,11 @@ def _transform_data(self): } -class ReleaseProcess(TransformAction, JSONTransformMixin): +class ReleaseProcessAction(TransformAction, JSONTransformMixin): """Zenodo to RDM process a GitHub release action.""" name = "gh-release-process" - load_cls = load.ReleaseProcess + load_cls = load.ReleaseProcessAction @staticmethod def _patch_data(original, patch): @@ -263,15 +263,21 @@ def _patch_data(original, patch): @classmethod def matches_action(cls, tx): """Checks if the data corresponds with that required by the action.""" - if len(tx.operations) != 1: - return False + rules = { + "records_metadata", + "pidstore_pid", + "files_bucket", + "files_object", + "files_files", + "github_releases", + } - op = tx.operations[0] + for op in tx.operations: + if op["source"]["table"] in rules and op["op"] == OperationType.INSERT: + rules.remove(op["source"]["table"]) - return ( - op["source"]["table"] == "github_releases" - and op["op"] == OperationType.UPDATE - ) + # there is at least one creation of each of the tables needed + return len(rules) == 0 def _transform_data(self): """Transforms the data and returns dictionary.""" @@ -371,15 +377,8 @@ def _transform_data(self): try: parent_pid = pids[parent["json"]["id"]] except KeyError: - # when it's not the first release the parent was already created - parent_pid = { - "id": parent["json"]["pid"]["pk"], # change from state on load - "pid_type": "recid", - "pid_value": parent["json"]["id"], - "status": "R", - "object_type": "rec", - # object_uuid is assigned by the pks generation of the load action - } + # if not present it was already registered in a previous release + pass # calculate parent doi record_doi = dois[record["id"]] @@ -393,6 +392,18 @@ def _transform_data(self): bucket = buckets[record["bucket_id"]] fo = fos[bucket["id"]] # file object + # calculate file record + fr = { + "id": None, # generated by the load action + "json": {}, + "created": fo["created"], + "updated": fo["updated"], + "version_id": 1, + "key": fo["key"], + "record_id": None, # calculated by the load action + "object_version_id": fo["version_id"], + } + # load json fields self._load_json_fields(data=release, fields=["errors"]) @@ -400,7 +411,6 @@ def _transform_data(self): self._microseconds_to_isodate(data=record_pid, fields=["created", "updated"]) self._microseconds_to_isodate(data=record_doi, fields=["created", "updated"]) self._microseconds_to_isodate(data=record_oai, fields=["created", "updated"]) - self._microseconds_to_isodate(data=parent_pid, fields=["created", "updated"]) self._microseconds_to_isodate(data=bucket, fields=["created", "updated"]) self._microseconds_to_isodate(data=fo, fields=["created", "updated"]) self._microseconds_to_isodate( @@ -412,25 +422,37 @@ def _transform_data(self): self._microseconds_to_isodate(data=parent, fields=["created", "updated"]) self._microseconds_to_isodate(data=release, fields=["created", "updated"]) - # note: the load action needs to store the pids in state because the parent + # TODO: the load action needs to store the pids in state because the parent # could be in the first release and this be the second, need to find it + + # note: record and parent need to be transformed (done in lines above) + # the rest are simple "identity" and since we already have a dict they can be + # passed as they are result = { "tx_id": self.tx.id, "record_pid": record_pid, - "parent_pid": parent_pid, "record_doi": record_doi, "record_oai": record_oai, "file_bucket": bucket, "file_object": fo, "file_instance": fi, + "file_record": fr, "parent": record, "record": parent, # using identity because it accounts for partial updates - "gh_release": IdentityTransform()._transform(release), + "gh_release": release, } + if parent_pid: + self._microseconds_to_isodate( + data=parent_pid, fields=["created", "updated"] + ) + result["parent_pid"] = parent_pid + if parent_doi: self._microseconds_to_isodate( data=parent_doi, fields=["created", "updated"] ) result["parent_doi"] = parent_doi + + return result From 62e1cb3845b4cc36a81214cd43c8979990b0bf10 Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Tue, 19 Sep 2023 14:37:57 +0200 Subject: [PATCH 08/35] migrator: oauth2server client transform bugfix --- .../tests/actions/oauth/test_oauth_actions.py | 386 ++++++++++++++++++ .../linked_accounts/connect_github.jsonl | 0 .../linked_accounts/connect_orcid.jsonl | 4 + .../linked_accounts/disconnect_github.jsonl | 0 .../linked_accounts/disconnect_orcid.jsonl | 3 + .../actions/transform/__init__.py | 4 + .../actions/transform/oauth.py | 119 +++++- .../transform/transactions.py | 4 + 8 files changed, 514 insertions(+), 6 deletions(-) create mode 100644 migrator/tests/actions/oauth/testdata/linked_accounts/connect_github.jsonl create mode 100644 migrator/tests/actions/oauth/testdata/linked_accounts/connect_orcid.jsonl create mode 100644 migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_github.jsonl create mode 100644 migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_orcid.jsonl diff --git a/migrator/tests/actions/oauth/test_oauth_actions.py b/migrator/tests/actions/oauth/test_oauth_actions.py index 72a38330..35030e76 100644 --- a/migrator/tests/actions/oauth/test_oauth_actions.py +++ b/migrator/tests/actions/oauth/test_oauth_actions.py @@ -19,6 +19,8 @@ OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, + OAuthLinkedAccountConnectAction, + OAuthLinkedAccountDisconnectAction, OAuthServerTokenCreateAction, OAuthServerTokenDeleteAction, OAuthServerTokenUpdateAction, @@ -635,3 +637,387 @@ def test_transform_with_valid_data(self, delete_oauth_application_tx): ) ) assert isinstance(action.transform(), load.OAuthApplicationDeleteAction) + + +## +# LINKED ACCOUNTS +## + + +@pytest.fixture() +def connect_orcid_oauth_application_tx(): + """Transaction data to connect an OAuth ORCID account. + + As it would be after the extraction step. + """ + datafile = ( + Path(__file__).parent / "testdata" / "linked_accounts" / "connect_orcid.jsonl" + ) + with open(datafile, "rb") as reader: + ops = [orjson.loads(line)["value"] for line in reader] + + return {"tx_id": 1, "operations": ops} + + +class TestOAuthLinkedAccountConnectAction: + """Connect an OAuth account action tests.""" + + def test_matches_with_valid_data(self): + assert ( + OAuthLinkedAccountConnectAction.matches_action( + Tx( + id=1, + operations=[ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + ], + ) + ) + is True + ) + + def test_matches_with_invalid_data(self): + empty = [] + + no_account = [ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + ] + + no_account_update = [ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + ] + + double_insert = [ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + ] + + double_update = [ + { + "op": OperationType.UPDATE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + ] + + no_token = [ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + ] + + no_user_identity = [ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + ] + + wrong_op = [ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + ] + + for invalid_ops in [ + empty, + no_account, + no_account_update, + double_insert, + double_update, + no_token, + no_user_identity, + wrong_op, + ]: + assert ( + OAuthLinkedAccountConnectAction.matches_action( + Tx(id=1, operations=invalid_ops) + ) + is False + ) + + def test_transform_with_valid_data(self, connect_orcid_oauth_application_tx): + action = OAuthLinkedAccountConnectAction( + Tx( + id=connect_orcid_oauth_application_tx["tx_id"], + operations=connect_orcid_oauth_application_tx["operations"], + ) + ) + assert isinstance(action.transform(), load.OAuthLinkedAccountConnectAction) + + +@pytest.fixture() +def disconnect_orcid_oauth_application_tx(): + """Transaction data to connect an OAuth ORCID account. + + As it would be after the extraction step. + """ + datafile = ( + Path(__file__).parent + / "testdata" + / "linked_accounts" + / "disconnect_orcid.jsonl" + ) + with open(datafile, "rb") as reader: + ops = [orjson.loads(line)["value"] for line in reader] + + return {"tx_id": 1, "operations": ops} + + +class TestOAuthLinkedAccountDisconnectAction: + """Disonnect an OAuth account action tests.""" + + def test_matches_with_valid_data(self): + assert ( + OAuthLinkedAccountDisconnectAction.matches_action( + Tx( + id=1, + operations=[ + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + ], + ) + ) + is True + ) + + def test_matches_with_invalid_data(self): + empty = [] + + no_account = [ + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + ] + + no_token = [ + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + ] + + no_user_identity = [ + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + ] + + wrong_op = [ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + ] + + extra_op = ( + [ + { + "op": OperationType.DELETE, + "source": {"table": "another"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "accounts_useridentity"}, + "after": {}, + }, + ], + ) + + for invalid_ops in [ + empty, + no_account, + no_token, + no_user_identity, + wrong_op, + extra_op, + ]: + assert ( + OAuthLinkedAccountDisconnectAction.matches_action( + Tx(id=1, operations=invalid_ops) + ) + is False + ) + + def test_transform_with_valid_data(self, disconnect_orcid_oauth_application_tx): + action = OAuthLinkedAccountDisconnectAction( + Tx( + id=disconnect_orcid_oauth_application_tx["tx_id"], + operations=disconnect_orcid_oauth_application_tx["operations"], + ) + ) + assert isinstance(action.transform(), load.OAuthLinkedAccountDisconnectAction) diff --git a/migrator/tests/actions/oauth/testdata/linked_accounts/connect_github.jsonl b/migrator/tests/actions/oauth/testdata/linked_accounts/connect_github.jsonl new file mode 100644 index 00000000..e69de29b diff --git a/migrator/tests/actions/oauth/testdata/linked_accounts/connect_orcid.jsonl b/migrator/tests/actions/oauth/testdata/linked_accounts/connect_orcid.jsonl new file mode 100644 index 00000000..4216846b --- /dev/null +++ b/migrator/tests/actions/oauth/testdata/linked_accounts/connect_orcid.jsonl @@ -0,0 +1,4 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 2042, "timestamp": 1695052639168, "timestamp_type": 0, "key": {"id": 8546, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remoteaccount"}, "value": {"before": null, "after": {"id": 8546, "user_id": 22858, "client_id": "APP-MAX7XCD8Q98X4VT6", "extra_data": "{}", "created": 1695052639131534, "updated": 1695052639131549}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695052639145, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472725538536\",\"1472725538736\"]", "schema": "public", "table": "oauthclient_remoteaccount", "txId": 563864048, "lsn": 1472725538736, "xmin": null}, "op": "c", "ts_ms": 1695052639160, "transaction": {"id": "563864048:1472725538736", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 89, "serialized_value_size": 566, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 2043, "timestamp": 1695052639168, "timestamp_type": 0, "key": {"id_remote_account": 8546, "token_type": "", "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remotetoken"}, "value": {"before": null, "after": {"id_remote_account": 8546, "token_type": "", "access_token": "R3RVeGc3K0RrM25rbXc4ZWxGM3oxYVA4LzcwVWpCNkM4aG8vRy9CNWxkZFFCMk9OR1d2d29lN3dKdWk2eEVTQQ==", "secret": "", "created": 1695052639134252, "updated": 1695052639134260}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695052639145, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472725538536\",\"1472725553864\"]", "schema": "public", "table": "oauthclient_remotetoken", "txId": 563864048, "lsn": 1472725553864, "xmin": null}, "op": "c", "ts_ms": 1695052639160, "transaction": {"id": "563864048:1472725553864", "total_order": 2, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 118, "serialized_value_size": 644, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 2044, "timestamp": 1695052639168, "timestamp_type": 0, "key": {"id": 8546, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remoteaccount"}, "value": {"before": {"id": 8546, "user_id": 22858, "client_id": "APP-MAX7XCD8Q98X4VT6", "extra_data": "{}", "created": 1695052639131534, "updated": 1695052639131549}, "after": {"id": 8546, "user_id": 22858, "client_id": "APP-MAX7XCD8Q98X4VT6", "extra_data": "{\"orcid\": \"0000-0002-5676-5956\", \"full_name\": \"Alex Ioannidis\"}", "created": 1695052639131534, "updated": 1695052639138958}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695052639145, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472725538536\",\"1472725565200\"]", "schema": "public", "table": "oauthclient_remoteaccount", "txId": 563864048, "lsn": 1472725565200, "xmin": null}, "op": "u", "ts_ms": 1695052639160, "transaction": {"id": "563864048:1472725565200", "total_order": 3, "data_collection_order": 2}}, "headers": [], "checksum": null, "serialized_key_size": 89, "serialized_value_size": 765, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 2045, "timestamp": 1695052639168, "timestamp_type": 0, "key": {"id": "0000-0002-5676-5956", "method": "orcid", "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_useridentity"}, "value": {"before": null, "after": {"id": "0000-0002-5676-5956", "method": "orcid", "id_user": 22858, "created": 1695052639141710, "updated": 1695052639141718}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695052639145, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472725538536\",\"1472725565432\"]", "schema": "public", "table": "oauthclient_useridentity", "txId": 563864048, "lsn": 1472725565432, "xmin": null}, "op": "c", "ts_ms": 1695052639160, "transaction": {"id": "563864048:1472725565432", "total_order": 4, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 122, "serialized_value_size": 546, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_github.jsonl b/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_github.jsonl new file mode 100644 index 00000000..e69de29b diff --git a/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_orcid.jsonl b/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_orcid.jsonl new file mode 100644 index 00000000..4b3fe5a7 --- /dev/null +++ b/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_orcid.jsonl @@ -0,0 +1,3 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 2046, "timestamp": 1695052669729, "timestamp_type": 0, "key": {"id": "0000-0002-5676-5956", "method": "orcid", "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_useridentity"}, "value": {"before": {"id": "0000-0002-5676-5956", "method": "orcid", "id_user": 22858, "created": 1695052639141710, "updated": 1695052639141718}, "after": null, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695052669252, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472725584624\",\"1472725584936\"]", "schema": "public", "table": "oauthclient_useridentity", "txId": 563864054, "lsn": 1472725584936, "xmin": null}, "op": "d", "ts_ms": 1695052669509, "transaction": {"id": "563864054:1472725584936", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 122, "serialized_value_size": 546, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 2048, "timestamp": 1695052669729, "timestamp_type": 0, "key": {"id_remote_account": 8546, "token_type": "", "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remotetoken"}, "value": {"before": {"id_remote_account": 8546, "token_type": "", "access_token": "R3RVeGc3K0RrM25rbXc4ZWxGM3oxYVA4LzcwVWpCNkM4aG8vRy9CNWxkZFFCMk9OR1d2d29lN3dKdWk2eEVTQQ==", "secret": "", "created": 1695052639134252, "updated": 1695052639134260}, "after": null, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695052669252, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472725584624\",\"1472725585048\"]", "schema": "public", "table": "oauthclient_remotetoken", "txId": 563864054, "lsn": 1472725585048, "xmin": null}, "op": "d", "ts_ms": 1695052669509, "transaction": {"id": "563864054:1472725585048", "total_order": 2, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 118, "serialized_value_size": 644, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 2050, "timestamp": 1695052669729, "timestamp_type": 0, "key": {"id": 8546, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remoteaccount"}, "value": {"before": {"id": 8546, "user_id": 22858, "client_id": "APP-MAX7XCD8Q98X4VT6", "extra_data": "{\"orcid\": \"0000-0002-5676-5956\", \"full_name\": \"Alex Ioannidis\"}", "created": 1695052639131534, "updated": 1695052639138958}, "after": null, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695052669252, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472725584624\",\"1472725585200\"]", "schema": "public", "table": "oauthclient_remoteaccount", "txId": 563864054, "lsn": 1472725585200, "xmin": null}, "op": "d", "ts_ms": 1695052669509, "transaction": {"id": "563864054:1472725585200", "total_order": 3, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 89, "serialized_value_size": 635, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py index 62522b5f..30422580 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py @@ -25,6 +25,8 @@ OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, + OAuthLinkedAccountConnectAction, + OAuthLinkedAccountDisconnectAction, OAuthServerTokenCreateAction, OAuthServerTokenDeleteAction, OAuthServerTokenUpdateAction, @@ -45,6 +47,8 @@ "OAuthApplicationCreateAction", "OAuthApplicationDeleteAction", "OAuthApplicationUpdateAction", + "OAuthLinkedAccountConnectAction", + "OAuthLinkedAccountDisconnectAction", "OAuthServerTokenCreateAction", "OAuthServerTokenDeleteAction", "OAuthServerTokenUpdateAction", diff --git a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py index 3cbcab55..c5c76bbd 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py @@ -10,7 +10,10 @@ from invenio_rdm_migrator.actions import TransformAction from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType from invenio_rdm_migrator.streams.actions import load -from invenio_rdm_migrator.streams.oauth import OAuthServerTokenTransform +from invenio_rdm_migrator.streams.oauth import ( + OAuthServerClientTransform, + OAuthServerTokenTransform, +) from invenio_rdm_migrator.transform import IdentityTransform @@ -52,7 +55,7 @@ def _transform_data(self): result = { "tx_id": self.tx.id, - "client": IdentityTransform()._transform(client_src), + "client": OAuthServerClientTransform()._transform(client_src), "token": OAuthServerTokenTransform()._transform(token_src), } @@ -99,7 +102,7 @@ def _transform_data(self): for op in self.tx.operations: if op["source"]["table"] == "oauth2server_client": - result["client"] = IdentityTransform()._transform(op["after"]) + result["client"] = OAuthServerClientTransform()._transform(op["after"]) elif op["source"]["table"] == "oauth2server_token": result["token"] = OAuthServerTokenTransform()._transform(op["after"]) @@ -161,7 +164,7 @@ def _transform_data(self): return { "tx_id": self.tx.id, - "client": IdentityTransform()._transform(op["after"]), + "client": OAuthServerClientTransform()._transform(op["after"]), } @@ -193,7 +196,7 @@ def _transform_data(self): return { "tx_id": self.tx.id, - "client": IdentityTransform()._transform(op["after"]), + "client": OAuthServerClientTransform()._transform(op["after"]), } @@ -222,5 +225,109 @@ def _transform_data(self): return { "tx_id": self.tx.id, - "client": IdentityTransform()._transform(op["before"]), + "client": OAuthServerClientTransform()._transform(op["before"]), + } + + +class OAuthLinkedAccountConnectAction(TransformAction): + """Zenodo to RDM OAuth client linked account connect action.""" + + name = "oauth-application-connect" + load_cls = load.OAuthLinkedAccountConnectAction + + @classmethod + def matches_action(cls, tx): + """Checks if the data corresponds with that required by the action.""" + if len(tx.operations) != 4: + return False + + rules = { + # OpType is not hashable (to use a dict), the list won't be long in any case + # so the worst case scenario is not too bad + "oauthclient_remoteaccount": [OperationType.INSERT, OperationType.UPDATE], + "oauthclient_remotetoken": [OperationType.INSERT], + "accounts_useridentity": [OperationType.INSERT], + } + + for op in tx.operations: + rule = rules.get(op["source"]["table"]) + if not rule: + return False + try: + rule.remove(op["op"]) # prevents double update/insert in sets + except ValueError: + return False + + return True + + def _transform_data(self): + """Transforms the data and returns dictionary.""" + remote_account = None + remote_token = None + user_identity = None + + for op in self.tx.operations: + if op["source"]["table"] == "oauthclient_remoteaccount": + if op["op"] == OperationType.INSERT: + remote_account = op["after"] + else: + for key, value in op["after"].items(): + remote_account[key] = value + elif op["source"]["table"] == "oauthclient_remotetoken": + remote_token = op["after"] + elif op["source"]["table"] == "accounts_useridentity": + user_identity = op["after"] + + return { + "tx_id": self.tx.id, + "remote_account": IdentityTransform()._transform(remote_account), + "remote_token": IdentityTransform()._transform(remote_token), + "user_identity": IdentityTransform()._transform(user_identity), + } + + +class OAuthLinkedAccountDisconnectAction(TransformAction): + """Zenodo to RDM OAuth client linked account disconnect action.""" + + name = "oauth-application-disconnect" + load_cls = load.OAuthLinkedAccountDisconnectAction + + @classmethod + def matches_action(cls, tx): + """Checks if the data corresponds with that required by the action.""" + if len(tx.operations) != 3: + return False + + rules = { + "oauthclient_remoteaccount": OperationType.DELETE, + "oauthclient_remotetoken": OperationType.DELETE, + "accounts_useridentity": OperationType.DELETE, + } + + for op in tx.operations: + rule = rules.pop(op["source"]["table"], None) + if not rule or rule != op["op"]: + return False + + return True + + def _transform_data(self): + """Transforms the data and returns dictionary.""" + remote_account = None + remote_token = None + user_identity = None + + for op in self.tx.operations: + if op["source"]["table"] == "oauthclient_remoteaccount": + remote_account = op["before"] + elif op["source"]["table"] == "oauthclient_remotetoken": + remote_token = op["before"] + elif op["source"]["table"] == "accounts_useridentity": + user_identity = op["before"] + + return { + "tx_id": self.tx.id, + "remote_account": IdentityTransform()._transform(remote_account), + "remote_token": IdentityTransform()._transform(remote_token), + "user_identity": IdentityTransform()._transform(user_identity), } diff --git a/migrator/zenodo_rdm_migrator/transform/transactions.py b/migrator/zenodo_rdm_migrator/transform/transactions.py index 653c1ace..b3ee84a0 100644 --- a/migrator/zenodo_rdm_migrator/transform/transactions.py +++ b/migrator/zenodo_rdm_migrator/transform/transactions.py @@ -23,6 +23,8 @@ OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, + OAuthLinkedAccountConnectAction, + OAuthLinkedAccountDisconnectAction, OAuthServerTokenCreateAction, OAuthServerTokenDeleteAction, OAuthServerTokenUpdateAction, @@ -50,6 +52,8 @@ class ZenodoTxTransform(BaseTxTransform): OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, + OAuthLinkedAccountConnectAction, + OAuthLinkedAccountDisconnectAction, OAuthServerTokenCreateAction, OAuthServerTokenDeleteAction, OAuthServerTokenUpdateAction, From 27055d97b79f45981f3c2415c4443108e3cf8592 Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Tue, 19 Sep 2023 15:05:10 +0200 Subject: [PATCH 09/35] migrator: oautch client linked applications, dis/connect orcid --- migrator/tests/actions/conftest.py | 11 ++- migrator/tests/actions/oauth/conftest.py | 8 ++ .../tests/actions/oauth/test_oauth_actions.py | 24 +++--- .../oauth/test_oauth_actions_stream.py | 76 ++++++++++++++++++- .../actions/transform/oauth.py | 8 +- 5 files changed, 109 insertions(+), 18 deletions(-) diff --git a/migrator/tests/actions/conftest.py b/migrator/tests/actions/conftest.py index 69a9301c..34e5ff1c 100644 --- a/migrator/tests/actions/conftest.py +++ b/migrator/tests/actions/conftest.py @@ -30,7 +30,12 @@ ) from invenio_rdm_migrator.streams.models.github import Release, Repository, WebhookEvent from invenio_rdm_migrator.streams.models.oai import OAISet -from invenio_rdm_migrator.streams.models.oauth import ServerClient, ServerToken +from invenio_rdm_migrator.streams.models.oauth import ( + RemoteAccount, + RemoteToken, + ServerClient, + ServerToken, +) from invenio_rdm_migrator.streams.models.pids import PersistentIdentifier from invenio_rdm_migrator.streams.models.records import ( RDMDraftFile, @@ -42,6 +47,7 @@ LoginInformation, SessionActivity, User, + UserIdentity, ) from invenio_rdm_migrator.streams.records.state import ParentModelValidator @@ -162,12 +168,15 @@ def database(engine): RDMParentMetadata, RDMVersionState, RDMParentCommunityMetadata, + RemoteAccount, + RemoteToken, Release, Repository, ServerClient, ServerToken, SessionActivity, User, + UserIdentity, WebhookEvent, ] diff --git a/migrator/tests/actions/oauth/conftest.py b/migrator/tests/actions/oauth/conftest.py index 4f222dd3..634cef5f 100644 --- a/migrator/tests/actions/oauth/conftest.py +++ b/migrator/tests/actions/oauth/conftest.py @@ -27,3 +27,11 @@ def tx_files_applications(): testdata_dir = Path(__file__).parent / "testdata" / "applications" assert testdata_dir.exists() return {f.stem: f for f in testdata_dir.iterdir() if f.is_file()} + + +@pytest.fixture() +def tx_files_linked_accounts(): + """Transactions file paths.""" + testdata_dir = Path(__file__).parent / "testdata" / "linked_accounts" + assert testdata_dir.exists() + return {f.stem: f for f in testdata_dir.iterdir() if f.is_file()} diff --git a/migrator/tests/actions/oauth/test_oauth_actions.py b/migrator/tests/actions/oauth/test_oauth_actions.py index 35030e76..bdff414d 100644 --- a/migrator/tests/actions/oauth/test_oauth_actions.py +++ b/migrator/tests/actions/oauth/test_oauth_actions.py @@ -680,7 +680,7 @@ def test_matches_with_valid_data(self): }, { "op": OperationType.INSERT, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, { @@ -705,7 +705,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.INSERT, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, { @@ -728,7 +728,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.INSERT, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, ] @@ -746,7 +746,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.INSERT, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, { @@ -769,7 +769,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.INSERT, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, { @@ -787,7 +787,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.INSERT, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, { @@ -828,7 +828,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.UPDATE, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, { @@ -904,7 +904,7 @@ def test_matches_with_valid_data(self): }, { "op": OperationType.DELETE, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, ], @@ -924,7 +924,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.DELETE, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, ] @@ -937,7 +937,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.DELETE, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, ] @@ -968,7 +968,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.DELETE, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, ] @@ -992,7 +992,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.DELETE, - "source": {"table": "accounts_useridentity"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, ], diff --git a/migrator/tests/actions/oauth/test_oauth_actions_stream.py b/migrator/tests/actions/oauth/test_oauth_actions_stream.py index f8b6342a..286e2b64 100644 --- a/migrator/tests/actions/oauth/test_oauth_actions_stream.py +++ b/migrator/tests/actions/oauth/test_oauth_actions_stream.py @@ -10,7 +10,13 @@ import pytest import sqlalchemy as sa from invenio_rdm_migrator.streams import Stream -from invenio_rdm_migrator.streams.models.oauth import ServerClient, ServerToken +from invenio_rdm_migrator.streams.models.oauth import ( + RemoteAccount, + RemoteToken, + ServerClient, + ServerToken, +) +from invenio_rdm_migrator.streams.models.users import UserIdentity from zenodo_rdm_migrator.transform.transactions import ZenodoTxTransform @@ -164,3 +170,71 @@ def test_oauth_application_delete_action_stream( stream.run() assert not db_client_server.scalars(sa.select(ServerClient)).one_or_none() + + +def test_oauth_linked_app_connect_action_stream( + db_client_server, pg_tx_load, test_extract_cls, tx_files_linked_accounts +): + stream = Stream( + name="action", + extract=test_extract_cls(tx_files_linked_accounts["connect_orcid"]), + transform=ZenodoTxTransform(), + load=pg_tx_load, + ) + stream.run() + + assert db_client_server.scalars(sa.select(RemoteAccount)).one() + assert db_client_server.scalars(sa.select(RemoteToken)).one() + assert db_client_server.scalars(sa.select(UserIdentity)).one() + + +@pytest.fixture(scope="function") +def db_linked_account(database, session): + remote_account = RemoteAccount( + id=8546, + user_id=22858, + client_id="APP-MAX7XCD8Q98X4VT6", + extra_data='{"orcid": "0000-0002-5676-5956", "full_name": "Alex Ioannidis"}', + created="2023-06-29T13:00:00", + updated="2023-06-29T14:00:00", + ) + + remote_token = RemoteToken( + id_remote_account=8546, + token_type="", + access_token="R3RVeGc3K0RrM25rbXc4ZWxGM3oxYVA4LzcwVWpCNkM4aG8vRy9CNWxkZFFCMk9OR1d2d29lN3dKdWk2eEVTQQ==", + secret="", + created="2023-06-29T13:00:00", + updated="2023-06-29T14:00:00", + ) + + user_identity = UserIdentity( + id="0000-0002-5676-5956", + method="orcid", + id_user=22858, + created="2023-06-29T13:00:00", + updated="2023-06-29T14:00:00", + ) + + session.add(remote_account) + session.add(remote_token) + session.add(user_identity) + session.commit() + + return session + + +def test_oauth_linked_app_disconnect_action_stream( + db_linked_account, pg_tx_load, test_extract_cls, tx_files_linked_accounts +): + stream = Stream( + name="action", + extract=test_extract_cls(tx_files_linked_accounts["disconnect_orcid"]), + transform=ZenodoTxTransform(), + load=pg_tx_load, + ) + stream.run() + + assert not db_linked_account.scalars(sa.select(RemoteAccount)).one_or_none() + assert not db_linked_account.scalars(sa.select(RemoteToken)).one_or_none() + assert not db_linked_account.scalars(sa.select(UserIdentity)).one_or_none() diff --git a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py index c5c76bbd..a6ad5304 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py @@ -246,7 +246,7 @@ def matches_action(cls, tx): # so the worst case scenario is not too bad "oauthclient_remoteaccount": [OperationType.INSERT, OperationType.UPDATE], "oauthclient_remotetoken": [OperationType.INSERT], - "accounts_useridentity": [OperationType.INSERT], + "oauthclient_useridentity": [OperationType.INSERT], } for op in tx.operations: @@ -275,7 +275,7 @@ def _transform_data(self): remote_account[key] = value elif op["source"]["table"] == "oauthclient_remotetoken": remote_token = op["after"] - elif op["source"]["table"] == "accounts_useridentity": + elif op["source"]["table"] == "oauthclient_useridentity": user_identity = op["after"] return { @@ -301,7 +301,7 @@ def matches_action(cls, tx): rules = { "oauthclient_remoteaccount": OperationType.DELETE, "oauthclient_remotetoken": OperationType.DELETE, - "accounts_useridentity": OperationType.DELETE, + "oauthclient_useridentity": OperationType.DELETE, } for op in tx.operations: @@ -322,7 +322,7 @@ def _transform_data(self): remote_account = op["before"] elif op["source"]["table"] == "oauthclient_remotetoken": remote_token = op["before"] - elif op["source"]["table"] == "accounts_useridentity": + elif op["source"]["table"] == "oauthclient_useridentity": user_identity = op["before"] return { From 33df2a520f4ff324d71f6be8c50a4c99bad62b36 Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Wed, 20 Sep 2023 10:04:02 +0200 Subject: [PATCH 10/35] migrator: add gh disconnect actions --- migrator/tests/actions/conftest.py | 47 +++-- .../tests/actions/oauth/test_oauth_actions.py | 188 ++++++++++++++---- .../oauth/test_oauth_actions_stream.py | 112 ++++++++++- .../disconnect_gh_client.jsonl | 2 + .../linked_accounts/disconnect_gh_token.jsonl | 2 + .../linked_accounts/disconnect_github.jsonl | 0 .../actions/transform/__init__.py | 2 + .../actions/transform/oauth.py | 57 +++++- .../transform/transactions.py | 2 + 9 files changed, 346 insertions(+), 66 deletions(-) create mode 100644 migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_gh_client.jsonl create mode 100644 migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_gh_token.jsonl delete mode 100644 migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_github.jsonl diff --git a/migrator/tests/actions/conftest.py b/migrator/tests/actions/conftest.py index 34e5ff1c..f88c565b 100644 --- a/migrator/tests/actions/conftest.py +++ b/migrator/tests/actions/conftest.py @@ -99,8 +99,8 @@ def test_extract_cls(): class TestExtract(Extract): """Test extractor.""" - def __init__(self, tx, filter_unchanged=True): - self.tx = tx + def __init__(self, txs, filter_unchanged=True): + self.txs = txs if isinstance(txs, list) else [txs] self.filter_unchanged = filter_unchanged # NOTE: Copied from KafkaExtract @@ -118,26 +118,29 @@ def _filter_unchanged_values(self, op): def run(self): """Yield one element at a time.""" - if isinstance(self.tx, dict): - tx = self.tx - if isinstance(self.tx, (str, Path)): - tx_path = Path(self.tx) - assert tx_path.exists() - with jsonlines.open(tx_path) as tx_ops: - tx = { - "operations": [ - {"key": op["key"], **op["value"]} for op in tx_ops - ] - } - # convert "op" to OperationType enum - for op in tx["operations"]: - op["op"] = OperationType(op["op"].upper()) - # extract the tx_id - tx["tx_id"] = tx["operations"][0]["source"]["txId"] - yield Tx( - id=tx["tx_id"], - operations=list(map(self._filter_unchanged_values, tx["operations"])), - ) + for tx in self.txs: + if isinstance(tx, dict): + tx = tx + if isinstance(tx, (str, Path)): + tx_path = Path(tx) + assert tx_path.exists() + with jsonlines.open(tx_path) as tx_ops: + tx = { + "operations": [ + {"key": op["key"], **op["value"]} for op in tx_ops + ] + } + # convert "op" to OperationType enum + for op in tx["operations"]: + op["op"] = OperationType(op["op"].upper()) + # extract the tx_id + tx["tx_id"] = tx["operations"][0]["source"]["txId"] + yield Tx( + id=tx["tx_id"], + operations=list( + map(self._filter_unchanged_values, tx["operations"]) + ), + ) return TestExtract diff --git a/migrator/tests/actions/oauth/test_oauth_actions.py b/migrator/tests/actions/oauth/test_oauth_actions.py index bdff414d..5a7b3b90 100644 --- a/migrator/tests/actions/oauth/test_oauth_actions.py +++ b/migrator/tests/actions/oauth/test_oauth_actions.py @@ -19,6 +19,7 @@ OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, + OAuthGHDisconnectToken, OAuthLinkedAccountConnectAction, OAuthLinkedAccountDisconnectAction, OAuthServerTokenCreateAction, @@ -887,36 +888,12 @@ class TestOAuthLinkedAccountDisconnectAction: """Disonnect an OAuth account action tests.""" def test_matches_with_valid_data(self): - assert ( - OAuthLinkedAccountDisconnectAction.matches_action( - Tx( - id=1, - operations=[ - { - "op": OperationType.DELETE, - "source": {"table": "oauthclient_remoteaccount"}, - "after": {}, - }, - { - "op": OperationType.DELETE, - "source": {"table": "oauthclient_remotetoken"}, - "after": {}, - }, - { - "op": OperationType.DELETE, - "source": {"table": "oauthclient_useridentity"}, - "after": {}, - }, - ], - ) - ) - is True - ) - - def test_matches_with_invalid_data(self): - empty = [] - - no_account = [ + full = [ + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, { "op": OperationType.DELETE, "source": {"table": "oauthclient_remotetoken"}, @@ -929,12 +906,39 @@ def test_matches_with_invalid_data(self): }, ] - no_token = [ + no_user_identity = [ { "op": OperationType.DELETE, "source": {"table": "oauthclient_remoteaccount"}, "after": {}, }, + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + ] + + for valid_ops in [ + full, + no_user_identity, + ]: + assert ( + OAuthLinkedAccountDisconnectAction.matches_action( + Tx(id=1, operations=valid_ops) + ) + is True + ) + + def test_matches_with_invalid_data(self): + empty = [] + + no_account = [ + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, { "op": OperationType.DELETE, "source": {"table": "oauthclient_useridentity"}, @@ -942,7 +946,7 @@ def test_matches_with_invalid_data(self): }, ] - no_user_identity = [ + no_token = [ { "op": OperationType.DELETE, "source": {"table": "oauthclient_remoteaccount"}, @@ -950,7 +954,7 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.DELETE, - "source": {"table": "oauthclient_remotetoken"}, + "source": {"table": "oauthclient_useridentity"}, "after": {}, }, ] @@ -1002,7 +1006,6 @@ def test_matches_with_invalid_data(self): empty, no_account, no_token, - no_user_identity, wrong_op, extra_op, ]: @@ -1021,3 +1024,120 @@ def test_transform_with_valid_data(self, disconnect_orcid_oauth_application_tx): ) ) assert isinstance(action.transform(), load.OAuthLinkedAccountDisconnectAction) + + +@pytest.fixture() +def disconnect_gh_token_oauth_app_tx(): + """Transaction data for the second phase of GH disconnection. + + As it would be after the extraction step. + """ + datafile = ( + Path(__file__).parent + / "testdata" + / "linked_accounts" + / "disconnect_gh_token.jsonl" + ) + with open(datafile, "rb") as reader: + ops = [orjson.loads(line)["value"] for line in reader] + + return {"tx_id": 1, "operations": ops} + + +class TestOAuthGHDisconnectToken: + """Disonnect an OAuth account action tests.""" + + def test_matches_with_valid_data(self): + assert ( + OAuthGHDisconnectToken.matches_action( + Tx( + id=1, + operations=[ + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_useridentity"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "oauth2server_token"}, + "after": {}, + }, + ], + ) + ) + is True + ) + + def test_matches_with_invalid_data(self): + empty = [] + + no_token = [ + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_useridentity"}, + "after": {}, + }, + ] + + no_identity = [ + { + "op": OperationType.DELETE, + "source": {"table": "oauth2server_token"}, + "after": {}, + }, + ] + + wrong_op = [ + { + "op": OperationType.UPDATE, + "source": {"table": "oauthclient_useridentity"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "oauth2server_token"}, + "after": {}, + }, + ] + + extra_op = ( + [ + { + "op": OperationType.DELETE, + "source": {"table": "another"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "oauthclient_useridentity"}, + "after": {}, + }, + { + "op": OperationType.DELETE, + "source": {"table": "oauth2server_token"}, + "after": {}, + }, + ], + ) + + for invalid_ops in [ + empty, + no_identity, + no_token, + wrong_op, + extra_op, + ]: + assert ( + OAuthGHDisconnectToken.matches_action(Tx(id=1, operations=invalid_ops)) + is False + ) + + def test_transform_with_valid_data(self, disconnect_gh_token_oauth_app_tx): + action = OAuthGHDisconnectToken( + Tx( + id=disconnect_gh_token_oauth_app_tx["tx_id"], + operations=disconnect_gh_token_oauth_app_tx["operations"], + ) + ) + assert isinstance(action.transform(), load.OAuthGHDisconnectToken) diff --git a/migrator/tests/actions/oauth/test_oauth_actions_stream.py b/migrator/tests/actions/oauth/test_oauth_actions_stream.py index 286e2b64..980de80c 100644 --- a/migrator/tests/actions/oauth/test_oauth_actions_stream.py +++ b/migrator/tests/actions/oauth/test_oauth_actions_stream.py @@ -172,7 +172,12 @@ def test_oauth_application_delete_action_stream( assert not db_client_server.scalars(sa.select(ServerClient)).one_or_none() -def test_oauth_linked_app_connect_action_stream( +# +# ORCID +# + + +def test_oauth_linked_app_connect_orcid_action_stream( db_client_server, pg_tx_load, test_extract_cls, tx_files_linked_accounts ): stream = Stream( @@ -189,7 +194,7 @@ def test_oauth_linked_app_connect_action_stream( @pytest.fixture(scope="function") -def db_linked_account(database, session): +def db_linked_orcid_account(database, session): remote_account = RemoteAccount( id=8546, user_id=22858, @@ -224,8 +229,8 @@ def db_linked_account(database, session): return session -def test_oauth_linked_app_disconnect_action_stream( - db_linked_account, pg_tx_load, test_extract_cls, tx_files_linked_accounts +def test_oauth_linked_app_disconnect_orcid_action_stream( + db_linked_orcid_account, pg_tx_load, test_extract_cls, tx_files_linked_accounts ): stream = Stream( name="action", @@ -235,6 +240,99 @@ def test_oauth_linked_app_disconnect_action_stream( ) stream.run() - assert not db_linked_account.scalars(sa.select(RemoteAccount)).one_or_none() - assert not db_linked_account.scalars(sa.select(RemoteToken)).one_or_none() - assert not db_linked_account.scalars(sa.select(UserIdentity)).one_or_none() + assert not db_linked_orcid_account.scalars(sa.select(RemoteAccount)).one_or_none() + assert not db_linked_orcid_account.scalars(sa.select(RemoteToken)).one_or_none() + assert not db_linked_orcid_account.scalars(sa.select(UserIdentity)).one_or_none() + + +# +# GH +# + + +# def test_oauth_linked_app_connect_gh_action_stream( +# db_client_server, pg_tx_load, test_extract_cls, tx_files_linked_accounts +# ): +# stream = Stream( +# name="action", +# extract=test_extract_cls(tx_files_linked_accounts["connect_gh"]), +# transform=ZenodoTxTransform(), +# load=pg_tx_load, +# ) +# stream.run() + +# assert db_client_server.scalars(sa.select(RemoteAccount)).one() +# assert db_client_server.scalars(sa.select(RemoteToken)).one() +# assert db_client_server.scalars(sa.select(UserIdentity)).one() + + +@pytest.fixture(scope="function") +def db_linked_gh_account(database, session): + remote_account = RemoteAccount( + id=8546, + user_id=86490, + client_id="APP-MAX7XCD8Q98X4VT6", + extra_data='{"orcid": "0000-0002-5676-5956", "full_name": "Alex Ioannidis"}', + created="2023-06-29T13:00:00", + updated="2023-06-29T14:00:00", + ) + + remote_token = RemoteToken( + id_remote_account=8546, + token_type="", + access_token="R3RVeGc3K0RrM25rbXc4ZWxGM3oxYVA4LzcwVWpCNkM4aG8vRy9CNWxkZFFCMk9OR1d2d29lN3dKdWk2eEVTQQ==", + secret="", + created="2023-06-29T13:00:00", + updated="2023-06-29T14:00:00", + ) + + user_identity = UserIdentity( + id="6756943", + method="github", + id_user=86490, + created="2023-06-29T13:00:00", + updated="2023-06-29T14:00:00", + ) + + server_token = ServerToken( + id=157734, + client_id="rKmVKlRxnQJfyizWeVKRO26cZjLqd2yWhsBFkjv0", + user_id=86490, + token_type="bearer", + access_token="cH4ng3DzbXd4QTcrRjFMcTVMRHl3QlY2Rkdib0VwREY4aDhPcHo2dUt2ZnZ3OVVPa1BvRDl0L1NRZmFrdXNIU2hJR2JWc0NHZDZSVEhVT2JQcmdjS1E9PQ==", + refresh_token=None, + expires=None, + _scopes="webhooks:event", + is_personal=True, + is_internal=False, + ) + + session.add(remote_account) + session.add(remote_token) + session.add(user_identity) + session.add(server_token) + session.commit() + + return session + + +def test_oauth_linked_app_disconnect_gh_action_stream( + db_linked_gh_account, pg_tx_load, test_extract_cls, tx_files_linked_accounts +): + stream = Stream( + name="action", + extract=test_extract_cls( + [ + tx_files_linked_accounts["disconnect_gh_client"], + tx_files_linked_accounts["disconnect_gh_token"], + ] + ), + transform=ZenodoTxTransform(), + load=pg_tx_load, + ) + stream.run() + + assert not db_linked_gh_account.scalars(sa.select(RemoteAccount)).one_or_none() + assert not db_linked_gh_account.scalars(sa.select(RemoteToken)).one_or_none() + assert not db_linked_gh_account.scalars(sa.select(ServerToken)).one_or_none() + assert not db_linked_gh_account.scalars(sa.select(UserIdentity)).one_or_none() diff --git a/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_gh_client.jsonl b/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_gh_client.jsonl new file mode 100644 index 00000000..935d70c6 --- /dev/null +++ b/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_gh_client.jsonl @@ -0,0 +1,2 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13280, "timestamp": 1695130471182, "timestamp_type": 0, "key": {"id_remote_account": 8546, "token_type": "", "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remotetoken"}, "value": {"before": {"id_remote_account": 8546, "token_type": "", "access_token": "Z04wTFQvUnpjUzd5alg2V3krMzRVUE9sTEpidnVEZjNNSXFCNmxSamdUcVZXU3IvRm1HVGFwRi9jN0FaSW9ZWA==", "secret": "", "created": 1695130329999576, "updated": 1695130329999584}, "after": null, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130470684, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472994628368\",\"1472994628408\"]", "schema": "public", "table": "oauthclient_remotetoken", "txId": 563876228, "lsn": 1472994628408, "xmin": null}, "op": "d", "ts_ms": 1695130471083, "transaction": {"id": "563876228:1472994628408", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 118, "serialized_value_size": 644, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13282, "timestamp": 1695130471182, "timestamp_type": 0, "key": {"id": 8546, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remoteaccount"}, "value": {"before": {"id": 8546, "user_id": 86490, "client_id": "64a3663a0ac1183598ce", "extra_data": "{\"tokens\": {\"webhook\": 157734}, \"name\": \"Pablo Panero\", \"login\": \"ppanero\", \"repos\": {\"252459525\": {\"default_branch\": \"master\", \"id\": 252459525, \"full_name\": \"ppanero/invenio-drafts\", \"description\": null}, \"83563583\": {\"default_branch\": \"master\", \"id\": 83563583, \"full_name\": \"ppanero/minidns\", \"description\": \"DNS library for Android and Java SE\"}, \"63950352\": {\"default_branch\": \"2.4\", \"id\": 63950352, \"full_name\": \"ppanero/MISP\", \"description\": \"MISP - Malware Information Sharing Platform & Threat Sharing\"}, \"262068245\": {\"default_branch\": \"master\", \"id\": 262068245, \"full_name\": \"ppanero/base32-lib\", \"description\": \"Library to generate, encode and decode random base32 strings.\"}, \"44679702\": {\"default_branch\": \"master\", \"id\": 44679702, \"full_name\": \"zenodo/zenodo-accessrequests\", \"description\": \"Zenodo access requests module.\"}, \"267876888\": {\"default_branch\": \"master\", \"id\": 267876888, \"full_name\": \"ppanero/invenio-theme\", \"description\": \"Invenio standard theme.\"}, \"317860377\": {\"default_branch\": \"master\", \"id\": 317860377, \"full_name\": \"ppanero/invenio-i18n\", \"description\": \"Invenio internationalization module.\"}, \"386573852\": {\"default_branch\": \"master\", \"id\": 386573852, \"full_name\": \"ppanero/elasticsearch-py\", \"description\": \"Official Python low-level client for Elasticsearch\"}, \"277326366\": {\"default_branch\": \"development\", \"id\": 277326366, \"full_name\": \"ppanero/ppanero.github.io\", \"description\": null}, \"614311967\": {\"default_branch\": \"master\", \"id\": 614311967, \"full_name\": \"ppanero/invenio-stats\", \"description\": \"Statistical data processing and querying for Invenio.\"}, \"219780130\": {\"default_branch\": \"master\", \"id\": 219780130, \"full_name\": \"ppanero/opensource\", \"description\": \"Repository for issues, documentation and other matters related to management of open source at Invenio Software.\"}, \"89223205\": {\"default_branch\": \"master\", \"id\": 89223205, \"full_name\": \"ppanero/puppet-misp\", \"description\": \"This module installs and configures MISP (Malware Information Sharing Platform)\"}, \"33995815\": {\"default_branch\": \"master\", \"id\": 33995815, \"full_name\": \"ppanero/J2EE_Training\", \"description\": \"Basic J2EE applications examples\"}, \"160211508\": {\"default_branch\": \"master\", \"id\": 160211508, \"full_name\": \"ppanero/invenio-indexer\", \"description\": \"Record indexer for Invenio.\"}, \"262090813\": {\"default_branch\": \"master\", \"id\": 262090813, \"full_name\": \"ppanero/pytest-invenio\", \"description\": \"Pytest fixtures for Invenio.\"}, \"259575871\": {\"default_branch\": \"master\", \"id\": 259575871, \"full_name\": \"ppanero/flask-resources\", \"description\": \"REST APIs for Flask\"}, \"161791042\": {\"default_branch\": \"master\", \"id\": 161791042, \"full_name\": \"ppanero/invenio-logging\", \"description\": \"Invenio logging module.\"}, \"490748487\": {\"default_branch\": \"master\", \"id\": 490748487, \"full_name\": \"ppanero/zenodo-docs-user\", \"description\": \"Zenodo User Documentation\"}, \"261774920\": {\"default_branch\": \"master\", \"id\": 261774920, \"full_name\": \"ppanero/invenio-config\", \"description\": \"Invenio configuration loading module.\"}, \"261176076\": {\"default_branch\": \"master\", \"id\": 261176076, \"full_name\": \"ppanero/invenio-oauth2server\", \"description\": \"Invenio modules that implements OAuth 2 server.\"}, \"69670989\": {\"default_branch\": \"master\", \"id\": 69670989, \"full_name\": \"ppanero/sdg_hunter\", \"description\": null}, \"247984718\": {\"default_branch\": \"master\", \"id\": 247984718, \"full_name\": \"zenodo/zenodo-rdm\", \"description\": \"Zenodo Invenio RDM instance\"}, \"79244368\": {\"default_branch\": \"master\", \"id\": 79244368, \"full_name\": \"zenodo/zenodo-docs-user\", \"description\": \"Zenodo User Documentation\"}, \"94411345\": {\"default_branch\": \"master\", \"id\": 94411345, \"full_name\": \"zenodo/zenodo-classifier\", \"description\": null}, \"263002194\": {\"default_branch\": \"master\", \"id\": 263002194, \"full_name\": \"ppanero/pywebpack\", \"description\": \"Webpack integration layer for Python.\"}, \"261972579\": {\"default_branch\": \"master\", \"id\": 261972579, \"full_name\": \"ppanero/dojson\", \"description\": \"Simple pythonic JSON to JSON converter.\"}, \"274697819\": {\"default_branch\": \"master\", \"id\": 274697819, \"full_name\": \"ppanero/invenio-records-agent\", \"description\": null}, \"49374300\": {\"default_branch\": \"master\", \"id\": 49374300, \"full_name\": \"ppanero/diseasemeter\", \"description\": null}, \"206281822\": {\"default_branch\": \"master\", \"id\": 206281822, \"full_name\": \"ppanero/invenio-rdm-records\", \"description\": \"DataCite-based data model for InvenioRDM flavour.\"}, \"263034466\": {\"default_branch\": \"master\", \"id\": 263034466, \"full_name\": \"ppanero/invenio-userprofiles\", \"description\": \"User profiles module for Invenio.\"}, \"246798947\": {\"default_branch\": \"master\", \"id\": 246798947, \"full_name\": \"ppanero/helm-chart-test\", \"description\": null}, \"121617508\": {\"default_branch\": \"master\", \"id\": 121617508, \"full_name\": \"ppanero/invenio-records-rest\", \"description\": \"Invenio records REST API module.\"}, \"444398708\": {\"default_branch\": \"main\", \"id\": 444398708, \"full_name\": \"ppanero/admin-war\", \"description\": \"Administration raffle, with a Pokemon UX\"}, \"220490874\": {\"default_branch\": \"master\", \"id\": 220490874, \"full_name\": \"ppanero/cookiecutter\", \"description\": \"A command-line utility that creates projects from cookiecutters (project templates), e.g. Python package projects, jQuery plugin projects.\"}, \"255355516\": {\"default_branch\": \"master\", \"id\": 255355516, \"full_name\": \"ppanero/fosc\", \"description\": \"Field Of Study Classification (FOSC)\"}, \"138305154\": {\"default_branch\": \"master\", \"id\": 138305154, \"full_name\": \"ppanero/invenio-db\", \"description\": \"Database management for Invenio.\"}, \"234055814\": {\"default_branch\": \"demo\", \"id\": 234055814, \"full_name\": \"ppanero/invenio-rdm-extension-demo\", \"description\": \"Invenio module to showcase how to add an extension to InvenioRDM\"}, \"31359116\": {\"default_branch\": \"master\", \"id\": 31359116, \"full_name\": \"ppanero/reversi_algorithm_AI\", \"description\": \"This is the implementation of an algorithm to play reversi/othello game. \"}, \"261788817\": {\"default_branch\": \"master\", \"id\": 261788817, \"full_name\": \"ppanero/datacite\", \"description\": \"Python API wrapper for the DataCite Metadata Store API.\"}, \"317593235\": {\"default_branch\": \"master\", \"id\": 317593235, \"full_name\": \"ppanero/invenio-records-ui\", \"description\": \"Invenio records user interface module.\"}, \"244578965\": {\"default_branch\": \"master\", \"id\": 244578965, \"full_name\": \"ppanero/training\", \"description\": \"Invenio v3 Training Material\"}, \"261783192\": {\"default_branch\": \"master\", \"id\": 261783192, \"full_name\": \"ppanero/citeproc-py-styles\", \"description\": \"CSL styles for citeproc-py.\"}, \"337696623\": {\"default_branch\": \"master\", \"id\": 337696623, \"full_name\": \"ppanero/accordion-webpack-test\", \"description\": \"Test repository to showcase an issue\"}, \"299837084\": {\"default_branch\": \"master\", \"id\": 299837084, \"full_name\": \"ppanero/marshmallow-utils\", \"description\": \"Extras and utilities for Marshmallow\"}, \"212821857\": {\"default_branch\": \"master\", \"id\": 212821857, \"full_name\": \"ppanero/invenio-cli\", \"description\": \"Cache module for Invenio\"}, \"281715368\": {\"default_branch\": \"master\", \"id\": 281715368, \"full_name\": \"ppanero/invenio-rest\", \"description\": \"REST API support for Invenio.\"}, \"306261165\": {\"default_branch\": \"master\", \"id\": 306261165, \"full_name\": \"ppanero/zenodo-spam-classifier\", \"description\": \"Zenodo Spam Classifier\"}, \"240457390\": {\"default_branch\": \"master\", \"id\": 240457390, \"full_name\": \"ppanero/docs-invenio-rdm\", \"description\": null}, \"85076660\": {\"default_branch\": \"develop\", \"id\": 85076660, \"full_name\": \"ppanero/storehaus\", \"description\": \"Storehaus is a library that makes it easy to work with asynchronous key value stores\"}, \"45678263\": {\"default_branch\": \"master\", \"id\": 45678263, \"full_name\": \"zenodo/zenodo-migrator\", \"description\": \"Zenodo module for migrating data into Invenio 3.\"}, \"317466297\": {\"default_branch\": \"master\", \"id\": 317466297, \"full_name\": \"ppanero/invenio-search-ui\", \"description\": \"UI for Invenio Search.\"}, \"602099393\": {\"default_branch\": \"master\", \"id\": 602099393, \"full_name\": \"ppanero/invenio-banners\", \"description\": \"Create and show banners with useful messages to users.\"}, \"261752517\": {\"default_branch\": \"master\", \"id\": 261752517, \"full_name\": \"ppanero/invenio-previewer\", \"description\": \"Invenio module for previewing files.\"}, \"319332551\": {\"default_branch\": \"master\", \"id\": 319332551, \"full_name\": \"ppanero/invenio-pidrelations\", \"description\": \"**WORK IN PROGRESS** - warning: repo can be squashed and force pushed!\"}, \"248186658\": {\"default_branch\": \"master\", \"id\": 248186658, \"full_name\": \"ppanero/demo-inveniordm\", \"description\": \"Demosite for InvenioRDM.\"}, \"218804088\": {\"default_branch\": \"master\", \"id\": 218804088, \"full_name\": \"ppanero/cookiecutter-invenio-rdm\", \"description\": \"Cookiecutter template for a new Invenio RDM instance.\"}, \"94749400\": {\"default_branch\": \"master\", \"id\": 94749400, \"full_name\": \"ppanero/coursera-big-data-analysis-with-spark\", \"description\": \"Repository with the summaries and programming assigments of the Coursera courses I have completed\"}, \"662918362\": {\"default_branch\": \"main\", \"id\": 662918362, \"full_name\": \"ppanero/django-playground\", \"description\": \"Just playing with Django\"}, \"79111899\": {\"default_branch\": \"master\", \"id\": 79111899, \"full_name\": \"zenodo/about.zenodo.org\", \"description\": \"Zenodo About Site\"}, \"427018972\": {\"default_branch\": \"main\", \"id\": 427018972, \"full_name\": \"ppanero/zenodo-release-test\", \"description\": null}, \"235594976\": {\"default_branch\": \"master\", \"id\": 235594976, \"full_name\": \"ppanero/invenio-s3\", \"description\": \"S3 file storage support for Invenio.\"}, \"244679632\": {\"default_branch\": \"master\", \"id\": 244679632, \"full_name\": \"ppanero/invenio-oaiserver\", \"description\": \"Invenio module that adds more fun to the platform.\"}, \"568834279\": {\"default_branch\": \"master\", \"id\": 568834279, \"full_name\": \"ppanero/invenio-rdm-migrator\", \"description\": \"Migration module for InvenioRDM.\"}, \"262254313\": {\"default_branch\": \"master\", \"id\": 262254313, \"full_name\": \"ppanero/invenio-jsonschemas\", \"description\": \"Invenio Schema Registry\"}, \"160536815\": {\"default_branch\": \"master\", \"id\": 160536815, \"full_name\": \"ppanero/cookiecutter-invenio-datamodel\", \"description\": \"Cookiecutter template for an Invenio data model.\"}, \"156718325\": {\"default_branch\": \"master\", \"id\": 156718325, \"full_name\": \"ppanero/invenio-app\", \"description\": \"WSGI, Celery and CLI applications for Invenio flavours.\"}, \"347623159\": {\"default_branch\": \"main\", \"id\": 347623159, \"full_name\": \"ppanero/coding-challenges\", \"description\": \"Coding challenges, interview style!\"}, \"105632504\": {\"default_branch\": \"master\", \"id\": 105632504, \"full_name\": \"ppanero/Cortex-Analyzers\", \"description\": \"Cortex Analyzers Repository\"}, \"237186300\": {\"default_branch\": \"master\", \"id\": 237186300, \"full_name\": \"ppanero/invenio-files-rest\", \"description\": \"REST API for uploading/downloading files for Invenio.\"}, \"189557504\": {\"default_branch\": \"master\", \"id\": 189557504, \"full_name\": \"ppanero/docker-invenio\", \"description\": \"Docker base images for Invenio.\"}, \"243214088\": {\"default_branch\": \"master\", \"id\": 243214088, \"full_name\": \"ppanero/flask-limiter\", \"description\": \"rate limiting extension for flask applications\"}, \"179234572\": {\"default_branch\": \"master\", \"id\": 179234572, \"full_name\": \"ppanero/invenio-records\", \"description\": \"Invenio-Records is a metadata storage module.\"}, \"79111955\": {\"default_branch\": \"master\", \"id\": 79111955, \"full_name\": \"zenodo/developers.zenodo.org\", \"description\": \"Zenodo Developers Site\"}, \"261704983\": {\"default_branch\": \"master\", \"id\": 261704983, \"full_name\": \"ppanero/flask-kvsession\", \"description\": \"A drop-in replacement for Flask's session handling using server-side sessions.\"}, \"289911577\": {\"default_branch\": \"master\", \"id\": 289911577, \"full_name\": \"ppanero/docker-services-cli\", \"description\": \"Infrastruce services for local and CI tests.\"}, \"261974298\": {\"default_branch\": \"master\", \"id\": 261974298, \"full_name\": \"ppanero/flask-menu\", \"description\": \"Flask-Menu is a Flask extension that adds support for generating menus.\"}, \"246600478\": {\"default_branch\": \"master\", \"id\": 246600478, \"full_name\": \"ppanero/helm-invenio\", \"description\": \"PROTOTYPE\"}, \"309361953\": {\"default_branch\": \"master\", \"id\": 309361953, \"full_name\": \"ppanero/react-invenio-forms\", \"description\": \"React component library for Formik components.\"}, \"37220642\": {\"default_branch\": \"master\", \"id\": 37220642, \"full_name\": \"ppanero/Personal_accounting\", \"description\": \"This is a basic personal accounting (incomes, expenses managemente, etc.) RESTful API with a basic JavaScript client.\"}, \"8135462\": {\"default_branch\": \"master\", \"id\": 8135462, \"full_name\": \"zenodo/zenodo\", \"description\": \"Research. Shared.\"}, \"319348010\": {\"default_branch\": \"master\", \"id\": 319348010, \"full_name\": \"ppanero/invenio-celery\", \"description\": \"Integration layer between Celery and Invenio.\"}, \"132122929\": {\"default_branch\": \"master\", \"id\": 132122929, \"full_name\": \"ppanero/invenio-oauthclient\", \"description\": \"Invenio module that provides OAuth web authorization support.\"}, \"195800883\": {\"default_branch\": \"master\", \"id\": 195800883, \"full_name\": \"ppanero/zenodo\", \"description\": \"Research. Shared.\"}, \"278560566\": {\"default_branch\": \"master\", \"id\": 278560566, \"full_name\": \"ppanero/invenio-files-processor\", \"description\": \"PROTOTYPE!!! Invenio module for file processing tasks.\"}, \"279229752\": {\"default_branch\": \"master\", \"id\": 279229752, \"full_name\": \"ppanero/invenio-drafts-resources\", \"description\": null}, \"259586362\": {\"default_branch\": \"master\", \"id\": 259586362, \"full_name\": \"ppanero/invenio-records-resources\", \"description\": \"REST APIs for Invenio.\"}, \"457732924\": {\"default_branch\": \"master\", \"id\": 457732924, \"full_name\": \"ppanero/invenio-queues\", \"description\": \"PROTOTYPE!!! Do not use this module yet.\"}, \"332469567\": {\"default_branch\": \"master\", \"id\": 332469567, \"full_name\": \"ppanero/invenio\", \"description\": \"Invenio digital library framework\"}, \"32339268\": {\"default_branch\": \"master\", \"id\": 32339268, \"full_name\": \"ppanero/com.aware.plugin.io\", \"description\": \"Plugin for the AWARE framework for distinction between\\nindoor and outdoor placement\"}, \"483147589\": {\"default_branch\": \"master\", \"id\": 483147589, \"full_name\": \"ppanero/invenio-users-resources\", \"description\": \"Invenio module which provides a REST API for managing users and groups.\"}, \"170124615\": {\"default_branch\": \"master\", \"id\": 170124615, \"full_name\": \"ppanero/react-searchkit\", \"description\": \"React components to build your search UI application.\"}, \"161789773\": {\"default_branch\": \"master\", \"id\": 161789773, \"full_name\": \"ppanero/invenio-assets\", \"description\": \"Invenio media assets management module.\"}, \"111808334\": {\"default_branch\": \"trunk\", \"id\": 111808334, \"full_name\": \"ppanero/kafka\", \"description\": \"Mirror of Apache Kafka\"}, \"438953297\": {\"default_branch\": \"master\", \"id\": 438953297, \"full_name\": \"ppanero/invenio-requests\", \"description\": null}, \"127255896\": {\"default_branch\": \"master\", \"id\": 127255896, \"full_name\": \"ppanero/invenio-search\", \"description\": \"Invenio module for information retrieval.\"}, \"161787740\": {\"default_branch\": \"master\", \"id\": 161787740, \"full_name\": \"ppanero/invenio-admin\", \"description\": \"Invenio admin module.\"}, \"337784157\": {\"default_branch\": \"main\", \"id\": 337784157, \"full_name\": \"ppanero/coursera-programming-in-golang\", \"description\": \"Code produced as part of the Coursera Specialization [\\\"Programming with Google Go](https://www.coursera.org/specializations/google-golang).\"}, \"178234206\": {\"default_branch\": \"master\", \"id\": 178234206, \"full_name\": \"ppanero/udemy-modern-react-with-redux\", \"description\": \"Repo with the code of some tutorials I have/am followed/ing\"}, \"263368032\": {\"default_branch\": \"master\", \"id\": 263368032, \"full_name\": \"ppanero/invenio-accounts\", \"description\": \"Invenio module for managing user accounts.\"}, \"161527137\": {\"default_branch\": \"master\", \"id\": 161527137, \"full_name\": \"ppanero/uoc_projects\", \"description\": \"Repository for the projects carried out for my Masters in Data Science at UOC\"}, \"250563388\": {\"default_branch\": \"master\", \"id\": 250563388, \"full_name\": \"ppanero/mkdocs-versioning\", \"description\": \"A tool that allows for versioning sites built with mkdocs\"}, \"26401131\": {\"default_branch\": \"master\", \"id\": 26401131, \"full_name\": \"zenodo/zenodo-backup\", \"description\": \"Static backup site for Zenodo in case of maintenance and/or unexpected downtime.\"}, \"88861550\": {\"default_branch\": \"master\", \"id\": 88861550, \"full_name\": \"ppanero/randscripts\", \"description\": \"Random scripts/small programs needed for some task\"}, \"268475759\": {\"default_branch\": \"master\", \"id\": 268475759, \"full_name\": \"ppanero/invenio-communities\", \"description\": \"Invenio communities module.\"}, \"372728689\": {\"default_branch\": \"master\", \"id\": 372728689, \"full_name\": \"ppanero/flask-iiif\", \"description\": \"Flask-IIIF is permitting easy integration with the International Image Interoperability Framework (IIIF) API standards.\"}, \"262082451\": {\"default_branch\": \"master\", \"id\": 262082451, \"full_name\": \"ppanero/idutils\", \"description\": \"Small Python library to validate persistent identifiers used in scholarly communication.\"}, \"97226101\": {\"default_branch\": \"master\", \"id\": 97226101, \"full_name\": \"ppanero/tinyproxy\", \"description\": \"tinyproxy - a light-weight HTTP/HTTPS proxy daemon for POSIX operating systems\"}, \"28222840\": {\"default_branch\": \"lanesdev\", \"id\": 28222840, \"full_name\": \"ppanero/RhoPollard_Factorize\", \"description\": \"Java multithreading Rho Pollard factoring algorithm\"}, \"107264379\": {\"default_branch\": \"master\", \"id\": 107264379, \"full_name\": \"ppanero/TheHive\", \"description\": \"TheHive: a Scalable, Open Source and Free Security Incident Response Platform\"}, \"161607554\": {\"default_branch\": \"master\", \"id\": 161607554, \"full_name\": \"ppanero/cookiecutter-invenio-instance\", \"description\": \"Cookiecutter template for an Invenio instance.\"}, \"290481539\": {\"default_branch\": \"master\", \"id\": 290481539, \"full_name\": \"ppanero/cookiecutter-invenio-module\", \"description\": \"Cookiecutter template for an Invenio module.\"}, \"161790344\": {\"default_branch\": \"master\", \"id\": 161790344, \"full_name\": \"ppanero/invenio-base\", \"description\": \"Base package for building the Invenio application.\"}, \"200075657\": {\"default_branch\": \"master\", \"id\": 200075657, \"full_name\": \"ppanero/invenio-app-rdm\", \"description\": \"RDM flavour of Invenio\"}, \"34480522\": {\"default_branch\": \"master\", \"id\": 34480522, \"full_name\": \"ppanero/Distributed_4_Player_Chess\", \"description\": \"4 player chess (all-to-all mode)\"}, \"160364696\": {\"default_branch\": \"master\", \"id\": 160364696, \"full_name\": \"ppanero/inveniosoftware.org\", \"description\": \"Sources of the http://inveniosoftware.org web site.\"}, \"321325459\": {\"default_branch\": \"master\", \"id\": 321325459, \"full_name\": \"ppanero/invenio-vocabularies\", \"description\": \"Invenio module for managing vocabularies.\"}, \"200073967\": {\"default_branch\": \"master\", \"id\": 200073967, \"full_name\": \"ppanero/invenio-records-permissions\", \"description\": \"Permissions for Invenio's records REST API.\"}, \"244411291\": {\"default_branch\": \"master\", \"id\": 244411291, \"full_name\": \"ppanero/invenio-iiif\", \"description\": \"IIIF API for Invenio.\"}, \"88095652\": {\"default_branch\": \"master\", \"id\": 88095652, \"full_name\": \"ppanero/UNED_CSE_Scripts\", \"description\": \"Simple scripts used for assigments on my Computer Science Engineering degree at UNED\"}, \"244406257\": {\"default_branch\": \"master\", \"id\": 244406257, \"full_name\": \"ppanero/invenio-cache\", \"description\": \"Cache module for Invenio\"}, \"520954282\": {\"default_branch\": \"master\", \"id\": 520954282, \"full_name\": \"ppanero/invenio-webhooks\", \"description\": \"Invenio module for processing webhook events.\"}, \"261756743\": {\"default_branch\": \"master\", \"id\": 261756743, \"full_name\": \"ppanero/jsonresolver\", \"description\": \"JSON data resolver with support for plugins.\"}, \"570582445\": {\"default_branch\": \"main\", \"id\": 570582445, \"full_name\": \"ppanero/invenio-administration\", \"description\": \"Invenio administration module.\"}, \"8135480\": {\"default_branch\": \"zenodo-master\", \"id\": 8135480, \"full_name\": \"zenodo/invenio\", \"description\": \"Deprecated repository.\"}, \"435429296\": {\"default_branch\": \"master\", \"id\": 435429296, \"full_name\": \"ppanero/zenodo-rdm\", \"description\": \"Zenodo Invenio RDM instance\"}, \"261780041\": {\"default_branch\": \"master\", \"id\": 261780041, \"full_name\": \"ppanero/invenio-formatter\", \"description\": \"Invenio module for formatting the bibliographic records.\"}, \"35002810\": {\"default_branch\": \"master\", \"id\": 35002810, \"full_name\": \"ppanero/java-design-patterns\", \"description\": \"Design pattern samples implemented in Java\"}, \"248187845\": {\"default_branch\": \"master\", \"id\": 248187845, \"full_name\": \"ppanero/dotfiles\", \"description\": \"Humble dotfiles\"}, \"79334344\": {\"default_branch\": \"master\", \"id\": 79334344, \"full_name\": \"zenodo/blog.zenodo.org\", \"description\": \"Zenodo News Site\"}, \"485388745\": {\"default_branch\": \"master\", \"id\": 485388745, \"full_name\": \"ppanero/flask-security-invenio\", \"description\": \"Private fork of Flask-Security\"}, \"335100363\": {\"default_branch\": \"master\", \"id\": 335100363, \"full_name\": \"ppanero/react-webpack-boilerplate\", \"description\": \"This repository is a boiler plate to start React projects, using Webpack for assets management and SCSS for styling.\"}, \"212273102\": {\"default_branch\": \"master\", \"id\": 212273102, \"full_name\": \"ppanero/rfcs\", \"description\": \"RFCs for Invenio\"}, \"308595151\": {\"default_branch\": \"master\", \"id\": 308595151, \"full_name\": \"ppanero/.github\", \"description\": \"Default community health files for the inveniosoftware GitHub organization repositories.\"}, \"301656016\": {\"default_branch\": \"master\", \"id\": 301656016, \"full_name\": \"ppanero/react-invenio-deposit\", \"description\": \"React application for Invenio deposit forms.\"}, \"80704473\": {\"default_branch\": \"master\", \"id\": 80704473, \"full_name\": \"ppanero/dnspython\", \"description\": \"a powerful DNS toolkit for python\"}, \"281133536\": {\"default_branch\": \"master\", \"id\": 281133536, \"full_name\": \"ppanero/invenio-pidstore\", \"description\": \"Invenio module that stores and registers persistent identifiers.\"}, \"479993832\": {\"default_branch\": \"main\", \"id\": 479993832, \"full_name\": \"ppanero/PokeWars-Backend\", \"description\": \"PokeWars backend application\"}, \"79111932\": {\"default_branch\": \"master\", \"id\": 79111932, \"full_name\": \"zenodo/help.zenodo.org\", \"description\": \"Zenodo Help Site\"}, \"161791656\": {\"default_branch\": \"master\", \"id\": 161791656, \"full_name\": \"ppanero/invenio-access\", \"description\": \"Invenio module for common role based access control.\"}, \"160569336\": {\"default_branch\": \"master\", \"id\": 160569336, \"full_name\": \"ppanero/uoc_data_mining\", \"description\": \"Data Mining Master course project\"}}, \"id\": 6756943, \"last_sync\": \"2023-09-19T13:32:16.294105+00:00\"}", "created": 1695130329996553, "updated": 1695130336304024}, "after": null, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130470684, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472994628368\",\"1472994628560\"]", "schema": "public", "table": "oauthclient_remoteaccount", "txId": 563876228, "lsn": 1472994628560, "xmin": null}, "op": "d", "ts_ms": 1695130471083, "transaction": {"id": "563876228:1472994628560", "total_order": 2, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 89, "serialized_value_size": 25570, "serialized_header_size": -1} diff --git a/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_gh_token.jsonl b/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_gh_token.jsonl new file mode 100644 index 00000000..33dcea22 --- /dev/null +++ b/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_gh_token.jsonl @@ -0,0 +1,2 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13276, "timestamp": 1695130471182, "timestamp_type": 0, "key": {"id": "6756943", "method": "github", "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_useridentity"}, "value": {"before": {"id": "6756943", "method": "github", "id_user": 86490, "created": 1695130336318289, "updated": 1695130336318298}, "after": null, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130470641, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472994611464\",\"1472994627904\"]", "schema": "public", "table": "oauthclient_useridentity", "txId": 563876226, "lsn": 1472994627904, "xmin": null}, "op": "d", "ts_ms": 1695130471083, "transaction": {"id": "563876226:1472994627904", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 111, "serialized_value_size": 535, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13278, "timestamp": 1695130471182, "timestamp_type": 0, "key": {"id": 157734, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauth2server_token"}, "value": {"before": {"id": 157734, "client_id": "rKmVKlRxnQJfyizWeVKRO26cZjLqd2yWhsBFkjv0", "user_id": 86490, "token_type": "bearer", "access_token": "dHRMMTN5K3dUTCtBMVcyMFhaRHFJa1prYnJ1YkN0Zlo4cVFmU1d1NXlTUnAveDE3WVFKcFBUeUJHTmFnQTh2emdpcG1idG1ZbTJpeFgzeG5nVGsrTHc9PQ==", "refresh_token": null, "expires": null, "_scopes": "webhooks:event", "is_personal": true, "is_internal": true}, "after": null, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130470641, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472994611464\",\"1472994628008\"]", "schema": "public", "table": "oauth2server_token", "txId": 563876226, "lsn": 1472994628008, "xmin": null}, "op": "d", "ts_ms": 1695130471083, "transaction": {"id": "563876226:1472994628008", "total_order": 2, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 84, "serialized_value_size": 770, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_github.jsonl b/migrator/tests/actions/oauth/testdata/linked_accounts/disconnect_github.jsonl deleted file mode 100644 index e69de29b..00000000 diff --git a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py index 30422580..55bdd449 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py @@ -25,6 +25,7 @@ OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, + OAuthGHDisconnectToken, OAuthLinkedAccountConnectAction, OAuthLinkedAccountDisconnectAction, OAuthServerTokenCreateAction, @@ -47,6 +48,7 @@ "OAuthApplicationCreateAction", "OAuthApplicationDeleteAction", "OAuthApplicationUpdateAction", + "OAuthGHDisconnectToken", "OAuthLinkedAccountConnectAction", "OAuthLinkedAccountDisconnectAction", "OAuthServerTokenCreateAction", diff --git a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py index a6ad5304..dc116a4a 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py @@ -295,13 +295,13 @@ class OAuthLinkedAccountDisconnectAction(TransformAction): @classmethod def matches_action(cls, tx): """Checks if the data corresponds with that required by the action.""" - if len(tx.operations) != 3: + if len(tx.operations) > 3 or len(tx.operations) < 2: return False rules = { "oauthclient_remoteaccount": OperationType.DELETE, "oauthclient_remotetoken": OperationType.DELETE, - "oauthclient_useridentity": OperationType.DELETE, + "oauthclient_useridentity": OperationType.DELETE, # optional in gh } for op in tx.operations: @@ -309,6 +309,11 @@ def matches_action(cls, tx): if not rule or rule != op["op"]: return False + # the previous len check means that at this point the rules have 1 or 0 left + # if the len is 1 then only left should be the identity + if len(rules) == 1 and not "oauthclient_useridentity" in rules.keys(): + return False + return True def _transform_data(self): @@ -325,9 +330,55 @@ def _transform_data(self): elif op["source"]["table"] == "oauthclient_useridentity": user_identity = op["before"] - return { + result = { "tx_id": self.tx.id, "remote_account": IdentityTransform()._transform(remote_account), "remote_token": IdentityTransform()._transform(remote_token), + } + + if user_identity: + result["user_identity"] = IdentityTransform()._transform(user_identity) + + return result + + +class OAuthGHDisconnectToken(TransformAction): + """Zenodo to RDM GH linked account disconnect server token and identity.""" + + name = "oauth-gh-application-disconnect" + load_cls = load.OAuthGHDisconnectToken + + @classmethod + def matches_action(cls, tx): + """Checks if the data corresponds with that required by the action.""" + if len(tx.operations) != 2: + return False + + rules = { + "oauthclient_useridentity": OperationType.DELETE, + "oauth2server_token": OperationType.DELETE, + } + + for op in tx.operations: + rule = rules.pop(op["source"]["table"], None) + if not rule or rule != op["op"]: + return False + + return True + + def _transform_data(self): + """Transforms the data and returns dictionary.""" + token = None + user_identity = None + + for op in self.tx.operations: + if op["source"]["table"] == "oauth2server_token": + token = op["before"] + elif op["source"]["table"] == "oauthclient_useridentity": + user_identity = op["before"] + + return { + "tx_id": self.tx.id, + "token": OAuthServerTokenTransform()._transform(token), "user_identity": IdentityTransform()._transform(user_identity), } diff --git a/migrator/zenodo_rdm_migrator/transform/transactions.py b/migrator/zenodo_rdm_migrator/transform/transactions.py index b3ee84a0..1dac77af 100644 --- a/migrator/zenodo_rdm_migrator/transform/transactions.py +++ b/migrator/zenodo_rdm_migrator/transform/transactions.py @@ -23,6 +23,7 @@ OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, + OAuthGHDisconnectToken, OAuthLinkedAccountConnectAction, OAuthLinkedAccountDisconnectAction, OAuthServerTokenCreateAction, @@ -52,6 +53,7 @@ class ZenodoTxTransform(BaseTxTransform): OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, + OAuthGHDisconnectToken, OAuthLinkedAccountConnectAction, OAuthLinkedAccountDisconnectAction, OAuthServerTokenCreateAction, From 47fda006e9b6fd367cd467a732efafbb5c4af645 Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Wed, 20 Sep 2023 10:49:30 +0200 Subject: [PATCH 11/35] migrator: add support for gh connect action --- .../tests/actions/oauth/test_oauth_actions.py | 211 ++++++++++++++---- .../oauth/test_oauth_actions_stream.py | 30 +-- .../testdata/linked_accounts/connect_gh.jsonl | 7 + .../linked_accounts/connect_github.jsonl | 0 .../actions/transform/oauth.py | 57 +++-- 5 files changed, 228 insertions(+), 77 deletions(-) create mode 100644 migrator/tests/actions/oauth/testdata/linked_accounts/connect_gh.jsonl delete mode 100644 migrator/tests/actions/oauth/testdata/linked_accounts/connect_github.jsonl diff --git a/migrator/tests/actions/oauth/test_oauth_actions.py b/migrator/tests/actions/oauth/test_oauth_actions.py index 5a7b3b90..286f9296 100644 --- a/migrator/tests/actions/oauth/test_oauth_actions.py +++ b/migrator/tests/actions/oauth/test_oauth_actions.py @@ -660,40 +660,87 @@ def connect_orcid_oauth_application_tx(): return {"tx_id": 1, "operations": ops} +@pytest.fixture() +def connect_gh_oauth_application_tx(): + """Transaction data to connect an OAuth ORCID account. + + As it would be after the extraction step. + """ + datafile = ( + Path(__file__).parent / "testdata" / "linked_accounts" / "connect_gh.jsonl" + ) + with open(datafile, "rb") as reader: + ops = [orjson.loads(line)["value"] for line in reader] + + return {"tx_id": 1, "operations": ops} + + class TestOAuthLinkedAccountConnectAction: """Connect an OAuth account action tests.""" def test_matches_with_valid_data(self): - assert ( - OAuthLinkedAccountConnectAction.matches_action( - Tx( - id=1, - operations=[ - { - "op": OperationType.INSERT, - "source": {"table": "oauthclient_remoteaccount"}, - "after": {}, - }, - { - "op": OperationType.INSERT, - "source": {"table": "oauthclient_remotetoken"}, - "after": {}, - }, - { - "op": OperationType.INSERT, - "source": {"table": "oauthclient_useridentity"}, - "after": {}, - }, - { - "op": OperationType.UPDATE, - "source": {"table": "oauthclient_remoteaccount"}, - "after": {}, - }, - ], + full = [ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_useridentity"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauth2server_client"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauth2server_token"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + ] + minimal = [ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_useridentity"}, + "after": {}, + }, + ] + + for valid_ops in [minimal]: + assert ( + OAuthLinkedAccountConnectAction.matches_action( + Tx(id=1, operations=valid_ops) ) + is True ) - is True - ) def test_matches_with_invalid_data(self): empty = [] @@ -709,14 +756,35 @@ def test_matches_with_invalid_data(self): "source": {"table": "oauthclient_useridentity"}, "after": {}, }, + ] + + no_token = [ { - "op": OperationType.UPDATE, + "op": OperationType.INSERT, "source": {"table": "oauthclient_remoteaccount"}, "after": {}, }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_useridentity"}, + "after": {}, + }, ] - no_account_update = [ + no_user_identity = [ + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remoteaccount"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, + ] + + no_server_token = [ { "op": OperationType.INSERT, "source": {"table": "oauthclient_remoteaccount"}, @@ -732,9 +800,14 @@ def test_matches_with_invalid_data(self): "source": {"table": "oauthclient_useridentity"}, "after": {}, }, + { + "op": OperationType.INSERT, + "source": {"table": "oauth2server_client"}, + "after": {}, + }, ] - double_insert = [ + no_server_client = [ { "op": OperationType.INSERT, "source": {"table": "oauthclient_remoteaccount"}, @@ -752,14 +825,14 @@ def test_matches_with_invalid_data(self): }, { "op": OperationType.INSERT, - "source": {"table": "oauthclient_remoteaccount"}, + "source": {"table": "oauth2server_token"}, "after": {}, }, ] - double_update = [ + wrong_update_op = [ { - "op": OperationType.UPDATE, + "op": OperationType.INSERT, "source": {"table": "oauthclient_remoteaccount"}, "after": {}, }, @@ -778,14 +851,34 @@ def test_matches_with_invalid_data(self): "source": {"table": "oauthclient_remoteaccount"}, "after": {}, }, + { + "op": OperationType.INSERT, + "source": {"table": "oauth2server_client"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauth2server_token"}, + "after": {}, + }, + { + "op": OperationType.UPDATE, # wrong + "source": {"table": "oauth2server_token"}, + "after": {}, + }, ] - no_token = [ + extra_update = [ { "op": OperationType.INSERT, "source": {"table": "oauthclient_remoteaccount"}, "after": {}, }, + { + "op": OperationType.INSERT, + "source": {"table": "oauthclient_remotetoken"}, + "after": {}, + }, { "op": OperationType.INSERT, "source": {"table": "oauthclient_useridentity"}, @@ -796,27 +889,24 @@ def test_matches_with_invalid_data(self): "source": {"table": "oauthclient_remoteaccount"}, "after": {}, }, - ] - - no_user_identity = [ { "op": OperationType.INSERT, - "source": {"table": "oauthclient_remoteaccount"}, + "source": {"table": "oauth2server_client"}, "after": {}, }, { "op": OperationType.INSERT, - "source": {"table": "oauthclient_remotetoken"}, + "source": {"table": "oauth2server_token"}, "after": {}, }, { "op": OperationType.UPDATE, - "source": {"table": "oauthclient_remoteaccount"}, + "source": {"table": "another"}, "after": {}, }, ] - wrong_op = [ + extra_insert = [ { "op": OperationType.INSERT, "source": {"table": "oauthclient_remoteaccount"}, @@ -828,7 +918,7 @@ def test_matches_with_invalid_data(self): "after": {}, }, { - "op": OperationType.UPDATE, + "op": OperationType.INSERT, "source": {"table": "oauthclient_useridentity"}, "after": {}, }, @@ -837,17 +927,33 @@ def test_matches_with_invalid_data(self): "source": {"table": "oauthclient_remoteaccount"}, "after": {}, }, + { + "op": OperationType.INSERT, + "source": {"table": "oauth2server_client"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "oauth2server_token"}, + "after": {}, + }, + { + "op": OperationType.INSERT, + "source": {"table": "another"}, + "after": {}, + }, ] for invalid_ops in [ empty, no_account, - no_account_update, - double_insert, - double_update, no_token, no_user_identity, - wrong_op, + no_server_token, + no_server_client, + wrong_update_op, + extra_update, + extra_insert, ]: assert ( OAuthLinkedAccountConnectAction.matches_action( @@ -856,7 +962,7 @@ def test_matches_with_invalid_data(self): is False ) - def test_transform_with_valid_data(self, connect_orcid_oauth_application_tx): + def test_transform_with_valid_orcid_data(self, connect_orcid_oauth_application_tx): action = OAuthLinkedAccountConnectAction( Tx( id=connect_orcid_oauth_application_tx["tx_id"], @@ -865,6 +971,15 @@ def test_transform_with_valid_data(self, connect_orcid_oauth_application_tx): ) assert isinstance(action.transform(), load.OAuthLinkedAccountConnectAction) + def test_transform_with_valid_gh_data(self, connect_gh_oauth_application_tx): + action = OAuthLinkedAccountConnectAction( + Tx( + id=connect_gh_oauth_application_tx["tx_id"], + operations=connect_gh_oauth_application_tx["operations"], + ) + ) + assert isinstance(action.transform(), load.OAuthLinkedAccountConnectAction) + @pytest.fixture() def disconnect_orcid_oauth_application_tx(): diff --git a/migrator/tests/actions/oauth/test_oauth_actions_stream.py b/migrator/tests/actions/oauth/test_oauth_actions_stream.py index 980de80c..d6a0c455 100644 --- a/migrator/tests/actions/oauth/test_oauth_actions_stream.py +++ b/migrator/tests/actions/oauth/test_oauth_actions_stream.py @@ -250,20 +250,22 @@ def test_oauth_linked_app_disconnect_orcid_action_stream( # -# def test_oauth_linked_app_connect_gh_action_stream( -# db_client_server, pg_tx_load, test_extract_cls, tx_files_linked_accounts -# ): -# stream = Stream( -# name="action", -# extract=test_extract_cls(tx_files_linked_accounts["connect_gh"]), -# transform=ZenodoTxTransform(), -# load=pg_tx_load, -# ) -# stream.run() - -# assert db_client_server.scalars(sa.select(RemoteAccount)).one() -# assert db_client_server.scalars(sa.select(RemoteToken)).one() -# assert db_client_server.scalars(sa.select(UserIdentity)).one() +def test_oauth_linked_app_connect_gh_action_stream( + db_client_server, pg_tx_load, test_extract_cls, tx_files_linked_accounts +): + stream = Stream( + name="action", + extract=test_extract_cls(tx_files_linked_accounts["connect_gh"]), + transform=ZenodoTxTransform(), + load=pg_tx_load, + ) + stream.run() + + assert db_client_server.scalars(sa.select(RemoteAccount)).one() + assert db_client_server.scalars(sa.select(RemoteToken)).one() + assert db_client_server.scalars(sa.select(UserIdentity)).one() + assert db_client_server.scalars(sa.select(ServerClient)).one() + assert db_client_server.scalars(sa.select(ServerToken)).one() @pytest.fixture(scope="function") diff --git a/migrator/tests/actions/oauth/testdata/linked_accounts/connect_gh.jsonl b/migrator/tests/actions/oauth/testdata/linked_accounts/connect_gh.jsonl new file mode 100644 index 00000000..9b6be3d4 --- /dev/null +++ b/migrator/tests/actions/oauth/testdata/linked_accounts/connect_gh.jsonl @@ -0,0 +1,7 @@ +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13224, "timestamp": 1695130336914, "timestamp_type": 0, "key": {"id": 8553, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remoteaccount"}, "value": {"before": null, "after": {"id": 8553, "user_id": 86490, "client_id": "64a3663a0ac1183598ce", "extra_data": "{}", "created": 1695130329996553, "updated": 1695130329996567}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130336322, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472993974464\",\"1472993977728\"]", "schema": "public", "table": "oauthclient_remoteaccount", "txId": 563876034, "lsn": 1472993977728, "xmin": null}, "op": "c", "ts_ms": 1695130336533, "transaction": {"id": "563876034:1472993977728", "total_order": 1, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 89, "serialized_value_size": 566, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13225, "timestamp": 1695130336914, "timestamp_type": 0, "key": {"id_remote_account": 8553, "token_type": "", "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remotetoken"}, "value": {"before": null, "after": {"id_remote_account": 8553, "token_type": "", "access_token": "Z04wTFQvUnpjUzd5alg2V3krMzRVUE9sTEpidnVEZjNNSXFCNmxSamdUcVZXU3IvRm1HVGFwRi9jN0FaSW9ZWA==", "secret": "", "created": 1695130329999576, "updated": 1695130329999584}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130336322, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472993974464\",\"1472994003224\"]", "schema": "public", "table": "oauthclient_remotetoken", "txId": 563876034, "lsn": 1472994003224, "xmin": null}, "op": "c", "ts_ms": 1695130336534, "transaction": {"id": "563876034:1472994003224", "total_order": 2, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 118, "serialized_value_size": 644, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13226, "timestamp": 1695130336914, "timestamp_type": 0, "key": {"client_id": "rKmVKlRxnQJfyizWeVKRO26cZjLqd2yWhsBFkjv0", "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauth2server_client"}, "value": {"before": null, "after": {"name": "github-webhook", "description": "", "website": "", "user_id": 86490, "client_id": "rKmVKlRxnQJfyizWeVKRO26cZjLqd2yWhsBFkjv0", "client_secret": "HEK4W0bk2Px8xBwVUnY1S4ze8TvuWLy9NeIrcoWAP2H177cJtm3GIvdpSHTi", "is_confidential": false, "is_internal": true, "_redirect_uris": null, "_default_scopes": "webhooks:event"}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130336322, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472993974464\",\"1472994016016\"]", "schema": "public", "table": "oauth2server_client", "txId": 563876034, "lsn": 1472994016016, "xmin": null}, "op": "c", "ts_ms": 1695130336534, "transaction": {"id": "563876034:1472994016016", "total_order": 3, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 128, "serialized_value_size": 731, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13227, "timestamp": 1695130336914, "timestamp_type": 0, "key": {"id": 157734, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauth2server_token"}, "value": {"before": null, "after": {"id": 157734, "client_id": "rKmVKlRxnQJfyizWeVKRO26cZjLqd2yWhsBFkjv0", "user_id": 86490, "token_type": "bearer", "access_token": "dHRMMTN5K3dUTCtBMVcyMFhaRHFJa1prYnJ1YkN0Zlo4cVFmU1d1NXlTUnAveDE3WVFKcFBUeUJHTmFnQTh2emdpcG1idG1ZbTJpeFgzeG5nVGsrTHc9PQ==", "refresh_token": null, "expires": null, "_scopes": "webhooks:event", "is_personal": true, "is_internal": true}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130336322, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472993974464\",\"1472994034632\"]", "schema": "public", "table": "oauth2server_token", "txId": 563876034, "lsn": 1472994034632, "xmin": null}, "op": "c", "ts_ms": 1695130336534, "transaction": {"id": "563876034:1472994034632", "total_order": 4, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 84, "serialized_value_size": 770, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13228, "timestamp": 1695130336914, "timestamp_type": 0, "key": {"id": 8553, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remoteaccount"}, "value": {"before": {"id": 8553, "user_id": 86490, "client_id": "64a3663a0ac1183598ce", "extra_data": "{}", "created": 1695130329996553, "updated": 1695130329996567}, "after": {"id": 8553, "user_id": 86490, "client_id": "64a3663a0ac1183598ce", "extra_data": "{\"tokens\": {\"webhook\": 157734}, \"name\": \"Pablo Panero\", \"login\": \"ppanero\", \"repos\": {}, \"id\": 6756943, \"last_sync\": \"2023-09-19T13:32:10.312546+00:00\"}", "created": 1695130329996553, "updated": 1695130336277950}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130336322, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472993974464\",\"1472994060816\"]", "schema": "public", "table": "oauthclient_remoteaccount", "txId": 563876034, "lsn": 1472994060816, "xmin": null}, "op": "u", "ts_ms": 1695130336534, "transaction": {"id": "563876034:1472994060816", "total_order": 5, "data_collection_order": 2}}, "headers": [], "checksum": null, "serialized_key_size": 89, "serialized_value_size": 866, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13229, "timestamp": 1695130336914, "timestamp_type": 0, "key": {"id": 8553, "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_remoteaccount"}, "value": {"before": {"id": 8553, "user_id": 86490, "client_id": "64a3663a0ac1183598ce", "extra_data": "{\"tokens\": {\"webhook\": 157734}, \"name\": \"Pablo Panero\", \"login\": \"ppanero\", \"repos\": {}, \"id\": 6756943, \"last_sync\": \"2023-09-19T13:32:10.312546+00:00\"}", "created": 1695130329996553, "updated": 1695130336277950}, "after": {"id": 8553, "user_id": 86490, "client_id": "64a3663a0ac1183598ce", "extra_data": "{\"tokens\": {\"webhook\": 157734}, \"name\": \"Pablo Panero\", \"login\": \"ppanero\", \"repos\": {\"252459525\": {\"default_branch\": \"master\", \"id\": 252459525, \"full_name\": \"ppanero/invenio-drafts\", \"description\": null}, \"83563583\": {\"default_branch\": \"master\", \"id\": 83563583, \"full_name\": \"ppanero/minidns\", \"description\": \"DNS library for Android and Java SE\"}, \"63950352\": {\"default_branch\": \"2.4\", \"id\": 63950352, \"full_name\": \"ppanero/MISP\", \"description\": \"MISP - Malware Information Sharing Platform & Threat Sharing\"}, \"262068245\": {\"default_branch\": \"master\", \"id\": 262068245, \"full_name\": \"ppanero/base32-lib\", \"description\": \"Library to generate, encode and decode random base32 strings.\"}, \"44679702\": {\"default_branch\": \"master\", \"id\": 44679702, \"full_name\": \"zenodo/zenodo-accessrequests\", \"description\": \"Zenodo access requests module.\"}, \"267876888\": {\"default_branch\": \"master\", \"id\": 267876888, \"full_name\": \"ppanero/invenio-theme\", \"description\": \"Invenio standard theme.\"}, \"317860377\": {\"default_branch\": \"master\", \"id\": 317860377, \"full_name\": \"ppanero/invenio-i18n\", \"description\": \"Invenio internationalization module.\"}, \"386573852\": {\"default_branch\": \"master\", \"id\": 386573852, \"full_name\": \"ppanero/elasticsearch-py\", \"description\": \"Official Python low-level client for Elasticsearch\"}, \"277326366\": {\"default_branch\": \"development\", \"id\": 277326366, \"full_name\": \"ppanero/ppanero.github.io\", \"description\": null}, \"614311967\": {\"default_branch\": \"master\", \"id\": 614311967, \"full_name\": \"ppanero/invenio-stats\", \"description\": \"Statistical data processing and querying for Invenio.\"}, \"219780130\": {\"default_branch\": \"master\", \"id\": 219780130, \"full_name\": \"ppanero/opensource\", \"description\": \"Repository for issues, documentation and other matters related to management of open source at Invenio Software.\"}, \"89223205\": {\"default_branch\": \"master\", \"id\": 89223205, \"full_name\": \"ppanero/puppet-misp\", \"description\": \"This module installs and configures MISP (Malware Information Sharing Platform)\"}, \"33995815\": {\"default_branch\": \"master\", \"id\": 33995815, \"full_name\": \"ppanero/J2EE_Training\", \"description\": \"Basic J2EE applications examples\"}, \"160211508\": {\"default_branch\": \"master\", \"id\": 160211508, \"full_name\": \"ppanero/invenio-indexer\", \"description\": \"Record indexer for Invenio.\"}, \"262090813\": {\"default_branch\": \"master\", \"id\": 262090813, \"full_name\": \"ppanero/pytest-invenio\", \"description\": \"Pytest fixtures for Invenio.\"}, \"259575871\": {\"default_branch\": \"master\", \"id\": 259575871, \"full_name\": \"ppanero/flask-resources\", \"description\": \"REST APIs for Flask\"}, \"161791042\": {\"default_branch\": \"master\", \"id\": 161791042, \"full_name\": \"ppanero/invenio-logging\", \"description\": \"Invenio logging module.\"}, \"490748487\": {\"default_branch\": \"master\", \"id\": 490748487, \"full_name\": \"ppanero/zenodo-docs-user\", \"description\": \"Zenodo User Documentation\"}, \"261774920\": {\"default_branch\": \"master\", \"id\": 261774920, \"full_name\": \"ppanero/invenio-config\", \"description\": \"Invenio configuration loading module.\"}, \"261176076\": {\"default_branch\": \"master\", \"id\": 261176076, \"full_name\": \"ppanero/invenio-oauth2server\", \"description\": \"Invenio modules that implements OAuth 2 server.\"}, \"69670989\": {\"default_branch\": \"master\", \"id\": 69670989, \"full_name\": \"ppanero/sdg_hunter\", \"description\": null}, \"247984718\": {\"default_branch\": \"master\", \"id\": 247984718, \"full_name\": \"zenodo/zenodo-rdm\", \"description\": \"Zenodo Invenio RDM instance\"}, \"79244368\": {\"default_branch\": \"master\", \"id\": 79244368, \"full_name\": \"zenodo/zenodo-docs-user\", \"description\": \"Zenodo User Documentation\"}, \"94411345\": {\"default_branch\": \"master\", \"id\": 94411345, \"full_name\": \"zenodo/zenodo-classifier\", \"description\": null}, \"263002194\": {\"default_branch\": \"master\", \"id\": 263002194, \"full_name\": \"ppanero/pywebpack\", \"description\": \"Webpack integration layer for Python.\"}, \"261972579\": {\"default_branch\": \"master\", \"id\": 261972579, \"full_name\": \"ppanero/dojson\", \"description\": \"Simple pythonic JSON to JSON converter.\"}, \"274697819\": {\"default_branch\": \"master\", \"id\": 274697819, \"full_name\": \"ppanero/invenio-records-agent\", \"description\": null}, \"49374300\": {\"default_branch\": \"master\", \"id\": 49374300, \"full_name\": \"ppanero/diseasemeter\", \"description\": null}, \"206281822\": {\"default_branch\": \"master\", \"id\": 206281822, \"full_name\": \"ppanero/invenio-rdm-records\", \"description\": \"DataCite-based data model for InvenioRDM flavour.\"}, \"263034466\": {\"default_branch\": \"master\", \"id\": 263034466, \"full_name\": \"ppanero/invenio-userprofiles\", \"description\": \"User profiles module for Invenio.\"}, \"246798947\": {\"default_branch\": \"master\", \"id\": 246798947, \"full_name\": \"ppanero/helm-chart-test\", \"description\": null}, \"121617508\": {\"default_branch\": \"master\", \"id\": 121617508, \"full_name\": \"ppanero/invenio-records-rest\", \"description\": \"Invenio records REST API module.\"}, \"444398708\": {\"default_branch\": \"main\", \"id\": 444398708, \"full_name\": \"ppanero/admin-war\", \"description\": \"Administration raffle, with a Pokemon UX\"}, \"220490874\": {\"default_branch\": \"master\", \"id\": 220490874, \"full_name\": \"ppanero/cookiecutter\", \"description\": \"A command-line utility that creates projects from cookiecutters (project templates), e.g. Python package projects, jQuery plugin projects.\"}, \"255355516\": {\"default_branch\": \"master\", \"id\": 255355516, \"full_name\": \"ppanero/fosc\", \"description\": \"Field Of Study Classification (FOSC)\"}, \"138305154\": {\"default_branch\": \"master\", \"id\": 138305154, \"full_name\": \"ppanero/invenio-db\", \"description\": \"Database management for Invenio.\"}, \"234055814\": {\"default_branch\": \"demo\", \"id\": 234055814, \"full_name\": \"ppanero/invenio-rdm-extension-demo\", \"description\": \"Invenio module to showcase how to add an extension to InvenioRDM\"}, \"31359116\": {\"default_branch\": \"master\", \"id\": 31359116, \"full_name\": \"ppanero/reversi_algorithm_AI\", \"description\": \"This is the implementation of an algorithm to play reversi/othello game. \"}, \"261788817\": {\"default_branch\": \"master\", \"id\": 261788817, \"full_name\": \"ppanero/datacite\", \"description\": \"Python API wrapper for the DataCite Metadata Store API.\"}, \"317593235\": {\"default_branch\": \"master\", \"id\": 317593235, \"full_name\": \"ppanero/invenio-records-ui\", \"description\": \"Invenio records user interface module.\"}, \"244578965\": {\"default_branch\": \"master\", \"id\": 244578965, \"full_name\": \"ppanero/training\", \"description\": \"Invenio v3 Training Material\"}, \"261783192\": {\"default_branch\": \"master\", \"id\": 261783192, \"full_name\": \"ppanero/citeproc-py-styles\", \"description\": \"CSL styles for citeproc-py.\"}, \"337696623\": {\"default_branch\": \"master\", \"id\": 337696623, \"full_name\": \"ppanero/accordion-webpack-test\", \"description\": \"Test repository to showcase an issue\"}, \"299837084\": {\"default_branch\": \"master\", \"id\": 299837084, \"full_name\": \"ppanero/marshmallow-utils\", \"description\": \"Extras and utilities for Marshmallow\"}, \"212821857\": {\"default_branch\": \"master\", \"id\": 212821857, \"full_name\": \"ppanero/invenio-cli\", \"description\": \"Cache module for Invenio\"}, \"281715368\": {\"default_branch\": \"master\", \"id\": 281715368, \"full_name\": \"ppanero/invenio-rest\", \"description\": \"REST API support for Invenio.\"}, \"306261165\": {\"default_branch\": \"master\", \"id\": 306261165, \"full_name\": \"ppanero/zenodo-spam-classifier\", \"description\": \"Zenodo Spam Classifier\"}, \"240457390\": {\"default_branch\": \"master\", \"id\": 240457390, \"full_name\": \"ppanero/docs-invenio-rdm\", \"description\": null}, \"85076660\": {\"default_branch\": \"develop\", \"id\": 85076660, \"full_name\": \"ppanero/storehaus\", \"description\": \"Storehaus is a library that makes it easy to work with asynchronous key value stores\"}, \"45678263\": {\"default_branch\": \"master\", \"id\": 45678263, \"full_name\": \"zenodo/zenodo-migrator\", \"description\": \"Zenodo module for migrating data into Invenio 3.\"}, \"317466297\": {\"default_branch\": \"master\", \"id\": 317466297, \"full_name\": \"ppanero/invenio-search-ui\", \"description\": \"UI for Invenio Search.\"}, \"602099393\": {\"default_branch\": \"master\", \"id\": 602099393, \"full_name\": \"ppanero/invenio-banners\", \"description\": \"Create and show banners with useful messages to users.\"}, \"261752517\": {\"default_branch\": \"master\", \"id\": 261752517, \"full_name\": \"ppanero/invenio-previewer\", \"description\": \"Invenio module for previewing files.\"}, \"319332551\": {\"default_branch\": \"master\", \"id\": 319332551, \"full_name\": \"ppanero/invenio-pidrelations\", \"description\": \"**WORK IN PROGRESS** - warning: repo can be squashed and force pushed!\"}, \"248186658\": {\"default_branch\": \"master\", \"id\": 248186658, \"full_name\": \"ppanero/demo-inveniordm\", \"description\": \"Demosite for InvenioRDM.\"}, \"218804088\": {\"default_branch\": \"master\", \"id\": 218804088, \"full_name\": \"ppanero/cookiecutter-invenio-rdm\", \"description\": \"Cookiecutter template for a new Invenio RDM instance.\"}, \"94749400\": {\"default_branch\": \"master\", \"id\": 94749400, \"full_name\": \"ppanero/coursera-big-data-analysis-with-spark\", \"description\": \"Repository with the summaries and programming assigments of the Coursera courses I have completed\"}, \"662918362\": {\"default_branch\": \"main\", \"id\": 662918362, \"full_name\": \"ppanero/django-playground\", \"description\": \"Just playing with Django\"}, \"79111899\": {\"default_branch\": \"master\", \"id\": 79111899, \"full_name\": \"zenodo/about.zenodo.org\", \"description\": \"Zenodo About Site\"}, \"427018972\": {\"default_branch\": \"main\", \"id\": 427018972, \"full_name\": \"ppanero/zenodo-release-test\", \"description\": null}, \"235594976\": {\"default_branch\": \"master\", \"id\": 235594976, \"full_name\": \"ppanero/invenio-s3\", \"description\": \"S3 file storage support for Invenio.\"}, \"244679632\": {\"default_branch\": \"master\", \"id\": 244679632, \"full_name\": \"ppanero/invenio-oaiserver\", \"description\": \"Invenio module that adds more fun to the platform.\"}, \"568834279\": {\"default_branch\": \"master\", \"id\": 568834279, \"full_name\": \"ppanero/invenio-rdm-migrator\", \"description\": \"Migration module for InvenioRDM.\"}, \"262254313\": {\"default_branch\": \"master\", \"id\": 262254313, \"full_name\": \"ppanero/invenio-jsonschemas\", \"description\": \"Invenio Schema Registry\"}, \"160536815\": {\"default_branch\": \"master\", \"id\": 160536815, \"full_name\": \"ppanero/cookiecutter-invenio-datamodel\", \"description\": \"Cookiecutter template for an Invenio data model.\"}, \"156718325\": {\"default_branch\": \"master\", \"id\": 156718325, \"full_name\": \"ppanero/invenio-app\", \"description\": \"WSGI, Celery and CLI applications for Invenio flavours.\"}, \"347623159\": {\"default_branch\": \"main\", \"id\": 347623159, \"full_name\": \"ppanero/coding-challenges\", \"description\": \"Coding challenges, interview style!\"}, \"105632504\": {\"default_branch\": \"master\", \"id\": 105632504, \"full_name\": \"ppanero/Cortex-Analyzers\", \"description\": \"Cortex Analyzers Repository\"}, \"237186300\": {\"default_branch\": \"master\", \"id\": 237186300, \"full_name\": \"ppanero/invenio-files-rest\", \"description\": \"REST API for uploading/downloading files for Invenio.\"}, \"189557504\": {\"default_branch\": \"master\", \"id\": 189557504, \"full_name\": \"ppanero/docker-invenio\", \"description\": \"Docker base images for Invenio.\"}, \"243214088\": {\"default_branch\": \"master\", \"id\": 243214088, \"full_name\": \"ppanero/flask-limiter\", \"description\": \"rate limiting extension for flask applications\"}, \"179234572\": {\"default_branch\": \"master\", \"id\": 179234572, \"full_name\": \"ppanero/invenio-records\", \"description\": \"Invenio-Records is a metadata storage module.\"}, \"79111955\": {\"default_branch\": \"master\", \"id\": 79111955, \"full_name\": \"zenodo/developers.zenodo.org\", \"description\": \"Zenodo Developers Site\"}, \"261704983\": {\"default_branch\": \"master\", \"id\": 261704983, \"full_name\": \"ppanero/flask-kvsession\", \"description\": \"A drop-in replacement for Flask's session handling using server-side sessions.\"}, \"289911577\": {\"default_branch\": \"master\", \"id\": 289911577, \"full_name\": \"ppanero/docker-services-cli\", \"description\": \"Infrastruce services for local and CI tests.\"}, \"261974298\": {\"default_branch\": \"master\", \"id\": 261974298, \"full_name\": \"ppanero/flask-menu\", \"description\": \"Flask-Menu is a Flask extension that adds support for generating menus.\"}, \"246600478\": {\"default_branch\": \"master\", \"id\": 246600478, \"full_name\": \"ppanero/helm-invenio\", \"description\": \"PROTOTYPE\"}, \"309361953\": {\"default_branch\": \"master\", \"id\": 309361953, \"full_name\": \"ppanero/react-invenio-forms\", \"description\": \"React component library for Formik components.\"}, \"37220642\": {\"default_branch\": \"master\", \"id\": 37220642, \"full_name\": \"ppanero/Personal_accounting\", \"description\": \"This is a basic personal accounting (incomes, expenses managemente, etc.) RESTful API with a basic JavaScript client.\"}, \"8135462\": {\"default_branch\": \"master\", \"id\": 8135462, \"full_name\": \"zenodo/zenodo\", \"description\": \"Research. Shared.\"}, \"319348010\": {\"default_branch\": \"master\", \"id\": 319348010, \"full_name\": \"ppanero/invenio-celery\", \"description\": \"Integration layer between Celery and Invenio.\"}, \"132122929\": {\"default_branch\": \"master\", \"id\": 132122929, \"full_name\": \"ppanero/invenio-oauthclient\", \"description\": \"Invenio module that provides OAuth web authorization support.\"}, \"195800883\": {\"default_branch\": \"master\", \"id\": 195800883, \"full_name\": \"ppanero/zenodo\", \"description\": \"Research. Shared.\"}, \"278560566\": {\"default_branch\": \"master\", \"id\": 278560566, \"full_name\": \"ppanero/invenio-files-processor\", \"description\": \"PROTOTYPE!!! Invenio module for file processing tasks.\"}, \"279229752\": {\"default_branch\": \"master\", \"id\": 279229752, \"full_name\": \"ppanero/invenio-drafts-resources\", \"description\": null}, \"259586362\": {\"default_branch\": \"master\", \"id\": 259586362, \"full_name\": \"ppanero/invenio-records-resources\", \"description\": \"REST APIs for Invenio.\"}, \"457732924\": {\"default_branch\": \"master\", \"id\": 457732924, \"full_name\": \"ppanero/invenio-queues\", \"description\": \"PROTOTYPE!!! Do not use this module yet.\"}, \"332469567\": {\"default_branch\": \"master\", \"id\": 332469567, \"full_name\": \"ppanero/invenio\", \"description\": \"Invenio digital library framework\"}, \"32339268\": {\"default_branch\": \"master\", \"id\": 32339268, \"full_name\": \"ppanero/com.aware.plugin.io\", \"description\": \"Plugin for the AWARE framework for distinction between\\nindoor and outdoor placement\"}, \"483147589\": {\"default_branch\": \"master\", \"id\": 483147589, \"full_name\": \"ppanero/invenio-users-resources\", \"description\": \"Invenio module which provides a REST API for managing users and groups.\"}, \"170124615\": {\"default_branch\": \"master\", \"id\": 170124615, \"full_name\": \"ppanero/react-searchkit\", \"description\": \"React components to build your search UI application.\"}, \"161789773\": {\"default_branch\": \"master\", \"id\": 161789773, \"full_name\": \"ppanero/invenio-assets\", \"description\": \"Invenio media assets management module.\"}, \"111808334\": {\"default_branch\": \"trunk\", \"id\": 111808334, \"full_name\": \"ppanero/kafka\", \"description\": \"Mirror of Apache Kafka\"}, \"438953297\": {\"default_branch\": \"master\", \"id\": 438953297, \"full_name\": \"ppanero/invenio-requests\", \"description\": null}, \"127255896\": {\"default_branch\": \"master\", \"id\": 127255896, \"full_name\": \"ppanero/invenio-search\", \"description\": \"Invenio module for information retrieval.\"}, \"161787740\": {\"default_branch\": \"master\", \"id\": 161787740, \"full_name\": \"ppanero/invenio-admin\", \"description\": \"Invenio admin module.\"}, \"337784157\": {\"default_branch\": \"main\", \"id\": 337784157, \"full_name\": \"ppanero/coursera-programming-in-golang\", \"description\": \"Code produced as part of the Coursera Specialization [\\\"Programming with Google Go](https://www.coursera.org/specializations/google-golang).\"}, \"178234206\": {\"default_branch\": \"master\", \"id\": 178234206, \"full_name\": \"ppanero/udemy-modern-react-with-redux\", \"description\": \"Repo with the code of some tutorials I have/am followed/ing\"}, \"263368032\": {\"default_branch\": \"master\", \"id\": 263368032, \"full_name\": \"ppanero/invenio-accounts\", \"description\": \"Invenio module for managing user accounts.\"}, \"161527137\": {\"default_branch\": \"master\", \"id\": 161527137, \"full_name\": \"ppanero/uoc_projects\", \"description\": \"Repository for the projects carried out for my Masters in Data Science at UOC\"}, \"250563388\": {\"default_branch\": \"master\", \"id\": 250563388, \"full_name\": \"ppanero/mkdocs-versioning\", \"description\": \"A tool that allows for versioning sites built with mkdocs\"}, \"26401131\": {\"default_branch\": \"master\", \"id\": 26401131, \"full_name\": \"zenodo/zenodo-backup\", \"description\": \"Static backup site for Zenodo in case of maintenance and/or unexpected downtime.\"}, \"88861550\": {\"default_branch\": \"master\", \"id\": 88861550, \"full_name\": \"ppanero/randscripts\", \"description\": \"Random scripts/small programs needed for some task\"}, \"268475759\": {\"default_branch\": \"master\", \"id\": 268475759, \"full_name\": \"ppanero/invenio-communities\", \"description\": \"Invenio communities module.\"}, \"372728689\": {\"default_branch\": \"master\", \"id\": 372728689, \"full_name\": \"ppanero/flask-iiif\", \"description\": \"Flask-IIIF is permitting easy integration with the International Image Interoperability Framework (IIIF) API standards.\"}, \"262082451\": {\"default_branch\": \"master\", \"id\": 262082451, \"full_name\": \"ppanero/idutils\", \"description\": \"Small Python library to validate persistent identifiers used in scholarly communication.\"}, \"97226101\": {\"default_branch\": \"master\", \"id\": 97226101, \"full_name\": \"ppanero/tinyproxy\", \"description\": \"tinyproxy - a light-weight HTTP/HTTPS proxy daemon for POSIX operating systems\"}, \"28222840\": {\"default_branch\": \"lanesdev\", \"id\": 28222840, \"full_name\": \"ppanero/RhoPollard_Factorize\", \"description\": \"Java multithreading Rho Pollard factoring algorithm\"}, \"107264379\": {\"default_branch\": \"master\", \"id\": 107264379, \"full_name\": \"ppanero/TheHive\", \"description\": \"TheHive: a Scalable, Open Source and Free Security Incident Response Platform\"}, \"161607554\": {\"default_branch\": \"master\", \"id\": 161607554, \"full_name\": \"ppanero/cookiecutter-invenio-instance\", \"description\": \"Cookiecutter template for an Invenio instance.\"}, \"290481539\": {\"default_branch\": \"master\", \"id\": 290481539, \"full_name\": \"ppanero/cookiecutter-invenio-module\", \"description\": \"Cookiecutter template for an Invenio module.\"}, \"161790344\": {\"default_branch\": \"master\", \"id\": 161790344, \"full_name\": \"ppanero/invenio-base\", \"description\": \"Base package for building the Invenio application.\"}, \"200075657\": {\"default_branch\": \"master\", \"id\": 200075657, \"full_name\": \"ppanero/invenio-app-rdm\", \"description\": \"RDM flavour of Invenio\"}, \"34480522\": {\"default_branch\": \"master\", \"id\": 34480522, \"full_name\": \"ppanero/Distributed_4_Player_Chess\", \"description\": \"4 player chess (all-to-all mode)\"}, \"160364696\": {\"default_branch\": \"master\", \"id\": 160364696, \"full_name\": \"ppanero/inveniosoftware.org\", \"description\": \"Sources of the http://inveniosoftware.org web site.\"}, \"321325459\": {\"default_branch\": \"master\", \"id\": 321325459, \"full_name\": \"ppanero/invenio-vocabularies\", \"description\": \"Invenio module for managing vocabularies.\"}, \"200073967\": {\"default_branch\": \"master\", \"id\": 200073967, \"full_name\": \"ppanero/invenio-records-permissions\", \"description\": \"Permissions for Invenio's records REST API.\"}, \"244411291\": {\"default_branch\": \"master\", \"id\": 244411291, \"full_name\": \"ppanero/invenio-iiif\", \"description\": \"IIIF API for Invenio.\"}, \"88095652\": {\"default_branch\": \"master\", \"id\": 88095652, \"full_name\": \"ppanero/UNED_CSE_Scripts\", \"description\": \"Simple scripts used for assigments on my Computer Science Engineering degree at UNED\"}, \"244406257\": {\"default_branch\": \"master\", \"id\": 244406257, \"full_name\": \"ppanero/invenio-cache\", \"description\": \"Cache module for Invenio\"}, \"520954282\": {\"default_branch\": \"master\", \"id\": 520954282, \"full_name\": \"ppanero/invenio-webhooks\", \"description\": \"Invenio module for processing webhook events.\"}, \"261756743\": {\"default_branch\": \"master\", \"id\": 261756743, \"full_name\": \"ppanero/jsonresolver\", \"description\": \"JSON data resolver with support for plugins.\"}, \"570582445\": {\"default_branch\": \"main\", \"id\": 570582445, \"full_name\": \"ppanero/invenio-administration\", \"description\": \"Invenio administration module.\"}, \"8135480\": {\"default_branch\": \"zenodo-master\", \"id\": 8135480, \"full_name\": \"zenodo/invenio\", \"description\": \"Deprecated repository.\"}, \"435429296\": {\"default_branch\": \"master\", \"id\": 435429296, \"full_name\": \"ppanero/zenodo-rdm\", \"description\": \"Zenodo Invenio RDM instance\"}, \"261780041\": {\"default_branch\": \"master\", \"id\": 261780041, \"full_name\": \"ppanero/invenio-formatter\", \"description\": \"Invenio module for formatting the bibliographic records.\"}, \"35002810\": {\"default_branch\": \"master\", \"id\": 35002810, \"full_name\": \"ppanero/java-design-patterns\", \"description\": \"Design pattern samples implemented in Java\"}, \"248187845\": {\"default_branch\": \"master\", \"id\": 248187845, \"full_name\": \"ppanero/dotfiles\", \"description\": \"Humble dotfiles\"}, \"79334344\": {\"default_branch\": \"master\", \"id\": 79334344, \"full_name\": \"zenodo/blog.zenodo.org\", \"description\": \"Zenodo News Site\"}, \"485388745\": {\"default_branch\": \"master\", \"id\": 485388745, \"full_name\": \"ppanero/flask-security-invenio\", \"description\": \"Private fork of Flask-Security\"}, \"335100363\": {\"default_branch\": \"master\", \"id\": 335100363, \"full_name\": \"ppanero/react-webpack-boilerplate\", \"description\": \"This repository is a boiler plate to start React projects, using Webpack for assets management and SCSS for styling.\"}, \"212273102\": {\"default_branch\": \"master\", \"id\": 212273102, \"full_name\": \"ppanero/rfcs\", \"description\": \"RFCs for Invenio\"}, \"308595151\": {\"default_branch\": \"master\", \"id\": 308595151, \"full_name\": \"ppanero/.github\", \"description\": \"Default community health files for the inveniosoftware GitHub organization repositories.\"}, \"301656016\": {\"default_branch\": \"master\", \"id\": 301656016, \"full_name\": \"ppanero/react-invenio-deposit\", \"description\": \"React application for Invenio deposit forms.\"}, \"80704473\": {\"default_branch\": \"master\", \"id\": 80704473, \"full_name\": \"ppanero/dnspython\", \"description\": \"a powerful DNS toolkit for python\"}, \"281133536\": {\"default_branch\": \"master\", \"id\": 281133536, \"full_name\": \"ppanero/invenio-pidstore\", \"description\": \"Invenio module that stores and registers persistent identifiers.\"}, \"479993832\": {\"default_branch\": \"main\", \"id\": 479993832, \"full_name\": \"ppanero/PokeWars-Backend\", \"description\": \"PokeWars backend application\"}, \"79111932\": {\"default_branch\": \"master\", \"id\": 79111932, \"full_name\": \"zenodo/help.zenodo.org\", \"description\": \"Zenodo Help Site\"}, \"161791656\": {\"default_branch\": \"master\", \"id\": 161791656, \"full_name\": \"ppanero/invenio-access\", \"description\": \"Invenio module for common role based access control.\"}, \"160569336\": {\"default_branch\": \"master\", \"id\": 160569336, \"full_name\": \"ppanero/uoc_data_mining\", \"description\": \"Data Mining Master course project\"}}, \"id\": 6756943, \"last_sync\": \"2023-09-19T13:32:16.294105+00:00\"}", "created": 1695130329996553, "updated": 1695130336304024}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130336322, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472993974464\",\"1472994103272\"]", "schema": "public", "table": "oauthclient_remoteaccount", "txId": 563876034, "lsn": 1472994103272, "xmin": null}, "op": "u", "ts_ms": 1695130336534, "transaction": {"id": "563876034:1472994103272", "total_order": 6, "data_collection_order": 3}}, "headers": [], "checksum": null, "serialized_key_size": 89, "serialized_value_size": 25870, "serialized_header_size": -1} +{"topic": "zenodo-qa.public", "partition": 0, "offset": 13230, "timestamp": 1695130336914, "timestamp_type": 0, "key": {"id": "6756943", "method": "github", "__dbz__physicalTableIdentifier": "zenodo-qa.public.oauthclient_useridentity"}, "value": {"before": null, "after": {"id": "6756943", "method": "github", "id_user": 86490, "created": 1695130336318289, "updated": 1695130336318298}, "source": {"version": "2.3.0.Final", "connector": "postgresql", "name": "zenodo-qa", "ts_ms": 1695130336322, "snapshot": "false", "db": "zenodo", "sequence": "[\"1472993974464\",\"1472994103616\"]", "schema": "public", "table": "oauthclient_useridentity", "txId": 563876034, "lsn": 1472994103616, "xmin": null}, "op": "c", "ts_ms": 1695130336534, "transaction": {"id": "563876034:1472994103616", "total_order": 7, "data_collection_order": 1}}, "headers": [], "checksum": null, "serialized_key_size": 111, "serialized_value_size": 535, "serialized_header_size": -1} \ No newline at end of file diff --git a/migrator/tests/actions/oauth/testdata/linked_accounts/connect_github.jsonl b/migrator/tests/actions/oauth/testdata/linked_accounts/connect_github.jsonl deleted file mode 100644 index e69de29b..00000000 diff --git a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py index dc116a4a..f8e01250 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py @@ -238,32 +238,47 @@ class OAuthLinkedAccountConnectAction(TransformAction): @classmethod def matches_action(cls, tx): """Checks if the data corresponds with that required by the action.""" - if len(tx.operations) != 4: + if 3 > len(tx.operations) or len(tx.operations) > 7: return False - rules = { - # OpType is not hashable (to use a dict), the list won't be long in any case - # so the worst case scenario is not too bad - "oauthclient_remoteaccount": [OperationType.INSERT, OperationType.UPDATE], - "oauthclient_remotetoken": [OperationType.INSERT], - "oauthclient_useridentity": [OperationType.INSERT], + mandatory = { + "oauthclient_remoteaccount", + "oauthclient_remotetoken", + "oauthclient_useridentity", + } + + optional = { + "oauth2server_client", + "oauth2server_token", } + allow_updates = {"oauthclient_remoteaccount"} + for op in tx.operations: - rule = rules.get(op["source"]["table"]) - if not rule: - return False - try: - rule.remove(op["op"]) # prevents double update/insert in sets - except ValueError: + if op["op"] == OperationType.UPDATE: + # nested to be able to have the final else clause by op type + if op["source"]["table"] not in allow_updates: + return False + elif op["op"] == OperationType.INSERT: + try: + mandatory.remove(op["source"]["table"]) + except KeyError: + try: + optional.remove(op["source"]["table"]) + except KeyError: + return False + else: return False - return True + # all mandatory were found and optionals are all or none + return len(mandatory) == 0 and (len(optional) == 2 or len(optional) == 0) def _transform_data(self): """Transforms the data and returns dictionary.""" remote_account = None remote_token = None + server_client = None + server_token = None user_identity = None for op in self.tx.operations: @@ -277,13 +292,25 @@ def _transform_data(self): remote_token = op["after"] elif op["source"]["table"] == "oauthclient_useridentity": user_identity = op["after"] + elif op["source"]["table"] == "oauth2server_client": + server_client = op["after"] + elif op["source"]["table"] == "oauth2server_token": + server_token = op["after"] - return { + result = { "tx_id": self.tx.id, "remote_account": IdentityTransform()._transform(remote_account), "remote_token": IdentityTransform()._transform(remote_token), "user_identity": IdentityTransform()._transform(user_identity), } + if server_client: + result["server_client"]: OAuthServerClientTransform()._transform( + server_client + ) + if server_token: + result["server_token"]: OAuthServerTokenTransform()._transform(server_token) + + return result class OAuthLinkedAccountDisconnectAction(TransformAction): From 638095d2ad2a3f6224fa51ce90ed3e885bd566d1 Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Thu, 21 Sep 2023 12:03:55 +0200 Subject: [PATCH 12/35] migrator: transform dates in oauth actions --- .../actions/transform/oauth.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py index f8e01250..049e4a20 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py @@ -53,6 +53,8 @@ def _transform_data(self): client_src = self.tx.operations[1]["after"] token_src = self.tx.operations[0]["after"] + self._microseconds_to_isodate(data=token_src, fields=["expires"]) + result = { "tx_id": self.tx.id, "client": OAuthServerClientTransform()._transform(client_src), @@ -105,6 +107,7 @@ def _transform_data(self): result["client"] = OAuthServerClientTransform()._transform(op["after"]) elif op["source"]["table"] == "oauth2server_token": + self._microseconds_to_isodate(data=op["after"], fields=["expires"]) result["token"] = OAuthServerTokenTransform()._transform(op["after"]) return result @@ -133,6 +136,7 @@ def _transform_data(self): """Transforms the data and returns dictionary.""" op = self.tx.operations[0] + self._microseconds_to_isodate(data=op["before"], fields=["expires"]) return { "tx_id": self.tx.id, "token": OAuthServerTokenTransform()._transform(op["before"]), @@ -297,6 +301,11 @@ def _transform_data(self): elif op["source"]["table"] == "oauth2server_token": server_token = op["after"] + self._microseconds_to_isodate( + data=remote_account, fields=["created", "updated"] + ) + self._microseconds_to_isodate(data=remote_token, fields=["created", "updated"]) + self._microseconds_to_isodate(data=user_identity, fields=["created", "updated"]) result = { "tx_id": self.tx.id, "remote_account": IdentityTransform()._transform(remote_account), @@ -308,6 +317,7 @@ def _transform_data(self): server_client ) if server_token: + self._microseconds_to_isodate(data=server_token, fields=["expires"]) result["server_token"]: OAuthServerTokenTransform()._transform(server_token) return result @@ -357,6 +367,10 @@ def _transform_data(self): elif op["source"]["table"] == "oauthclient_useridentity": user_identity = op["before"] + self._microseconds_to_isodate( + data=remote_account, fields=["created", "updated"] + ) + self._microseconds_to_isodate(data=remote_token, fields=["created", "updated"]) result = { "tx_id": self.tx.id, "remote_account": IdentityTransform()._transform(remote_account), @@ -364,6 +378,9 @@ def _transform_data(self): } if user_identity: + self._microseconds_to_isodate( + data=user_identity, fields=["created", "updated"] + ) result["user_identity"] = IdentityTransform()._transform(user_identity) return result @@ -400,8 +417,12 @@ def _transform_data(self): for op in self.tx.operations: if op["source"]["table"] == "oauth2server_token": + self._microseconds_to_isodate(data=op["before"], fields=["expires"]) token = op["before"] elif op["source"]["table"] == "oauthclient_useridentity": + self._microseconds_to_isodate( + data=op["before"], fields=["created", "updated"] + ) user_identity = op["before"] return { From aa2cdc7784e9515df42255c1e7024931f4dd4a03 Mon Sep 17 00:00:00 2001 From: Pablo Panero Date: Fri, 22 Sep 2023 09:54:28 +0200 Subject: [PATCH 13/35] migrator: load json fields in oauth actions --- migrator/zenodo_rdm_migrator/actions/transform/oauth.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py index 049e4a20..36eb7bca 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py @@ -14,7 +14,7 @@ OAuthServerClientTransform, OAuthServerTokenTransform, ) -from invenio_rdm_migrator.transform import IdentityTransform +from invenio_rdm_migrator.transform import IdentityTransform, JSONTransformMixin class OAuthServerTokenCreateAction(TransformAction): @@ -233,7 +233,7 @@ def _transform_data(self): } -class OAuthLinkedAccountConnectAction(TransformAction): +class OAuthLinkedAccountConnectAction(TransformAction, JSONTransformMixin): """Zenodo to RDM OAuth client linked account connect action.""" name = "oauth-application-connect" @@ -301,6 +301,7 @@ def _transform_data(self): elif op["source"]["table"] == "oauth2server_token": server_token = op["after"] + self._load_json_fields(data=remote_account, fields=["extra_data"]) self._microseconds_to_isodate( data=remote_account, fields=["created", "updated"] ) @@ -323,7 +324,7 @@ def _transform_data(self): return result -class OAuthLinkedAccountDisconnectAction(TransformAction): +class OAuthLinkedAccountDisconnectAction(TransformAction, JSONTransformMixin): """Zenodo to RDM OAuth client linked account disconnect action.""" name = "oauth-application-disconnect" @@ -367,6 +368,7 @@ def _transform_data(self): elif op["source"]["table"] == "oauthclient_useridentity": user_identity = op["before"] + self._load_json_fields(data=remote_account, fields=["extra_data"]) self._microseconds_to_isodate( data=remote_account, fields=["created", "updated"] ) From e3ad9af9c711f08bafe2038eea241c68c5700fdc Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Wed, 20 Sep 2023 03:50:59 +0200 Subject: [PATCH 14/35] migrator: fix permission_flags for parent --- migrator/zenodo_rdm_migrator/transform/entries/parents.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migrator/zenodo_rdm_migrator/transform/entries/parents.py b/migrator/zenodo_rdm_migrator/transform/entries/parents.py index 6c4cf41f..f774e44c 100644 --- a/migrator/zenodo_rdm_migrator/transform/entries/parents.py +++ b/migrator/zenodo_rdm_migrator/transform/entries/parents.py @@ -99,6 +99,8 @@ def transform(self, entry): has_only_managed_communities = comm_slugs < owner_comm_slugs if not has_only_managed_communities: permission_flags["can_community_manage_record"] = False + if permission_flags: + transformed["json"]["permission_flags"] = permission_flags elif not self.partial: raise KeyError("json") # else, pass From f82a5cb978dc1cbc16ee630ef97fdc7d48e832d2 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Thu, 21 Sep 2023 03:45:01 +0200 Subject: [PATCH 15/35] migrator: use orsjon in vocabulary dump scripts --- migrator/scripts/dump_affiliations_db.py | 6 +++--- migrator/scripts/dump_awards_db.py | 6 +++--- migrator/scripts/dump_funders_db.py | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/migrator/scripts/dump_affiliations_db.py b/migrator/scripts/dump_affiliations_db.py index 223458ec..f8520a1e 100644 --- a/migrator/scripts/dump_affiliations_db.py +++ b/migrator/scripts/dump_affiliations_db.py @@ -6,7 +6,7 @@ import csv import uuid -import orjson as json +import orjson from idutils import normalize_ror from invenio_rdm_migrator.utils import ts @@ -59,7 +59,7 @@ def load_file(datafile, outpath): with open(outpath, "w") as fout, open(datafile, "rb") as fp: print(f"[{ts()}] loading {datafile}") writer = csv.writer(fout) - entries = json.loads(fp.read()) + entries = orjson.loads(fp.read()) for idx, data in enumerate(entries): if idx % 1000 == 0: print(f"[{ts()}] {idx}") @@ -74,7 +74,7 @@ def load_file(datafile, outpath): ( str(uuid.uuid4()), # id affiliation_id, # pid - json.dumps(affiliation), # json + orjson.dumps(affiliation).decode("utf-8"), # json creation_ts, # created creation_ts, # updated (same as created) 1, # version_id diff --git a/migrator/scripts/dump_awards_db.py b/migrator/scripts/dump_awards_db.py index 1d616eb8..e463110d 100644 --- a/migrator/scripts/dump_awards_db.py +++ b/migrator/scripts/dump_awards_db.py @@ -7,9 +7,9 @@ """ import csv import gzip -import json import uuid +import orjson from invenio_rdm_migrator.utils import ts DATA_PATHS = [ @@ -111,7 +111,7 @@ def load_files(file_paths, outpath): if idx % 1000 == 0: print(f"[{ts()}] {idx}") try: - data = json.loads(line) + data = orjson.loads(line) award = transform_openaire_grant(data) if not award: print(f"[{ts()}] Failed to transform line {idx}:\n{data}\n") @@ -127,7 +127,7 @@ def load_files(file_paths, outpath): ( str(uuid.uuid4()), # id award_id, # pid - json.dumps(award), # json + orjson.dumps(award).decode("utf-8"), # json creation_ts, # created creation_ts, # updated (same as created) 1, # version_id diff --git a/migrator/scripts/dump_funders_db.py b/migrator/scripts/dump_funders_db.py index 9fdf1cde..1183a578 100644 --- a/migrator/scripts/dump_funders_db.py +++ b/migrator/scripts/dump_funders_db.py @@ -9,7 +9,7 @@ import csv import uuid -import orjson as json +import orjson from idutils import normalize_ror from invenio_rdm_migrator.utils import ts @@ -73,7 +73,7 @@ def load_file(datafile, outpath): with open(outpath, "w") as fout, open(datafile, "rb") as fp: print(f"[{ts()}] loading {datafile}") writer = csv.writer(fout) - entries = json.loads(fp.read()) + entries = orjson.loads(fp.read()) for idx, data in enumerate(entries): if idx % 1000 == 0: print(f"[{ts()}] {idx}") @@ -88,7 +88,7 @@ def load_file(datafile, outpath): ( str(uuid.uuid4()), # id funder_id, # pid - json.dumps(funder), # json + orjson.dumps(funder).decode("utf-8"), # json creation_ts, # created creation_ts, # updated (same as created) 1, # version_id From 14cea53aa9de59723e9b2397cd207cc43468f74e Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Thu, 21 Sep 2023 03:46:53 +0200 Subject: [PATCH 16/35] migrator: use binary dumps for OAuthClient --- migrator/migrate.sh | 10 ++++++++-- migrator/scripts/oauthclient_remoteaccount_dump.sql | 2 +- migrator/scripts/oauthclient_remotetoken_dump.sql | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/migrator/migrate.sh b/migrator/migrate.sh index 0620a159..f5a34e21 100755 --- a/migrator/migrate.sh +++ b/migrator/migrate.sh @@ -32,9 +32,10 @@ invenio communities custom-fields init # Script base path script_path=$( cd -- "$(dirname "$0")" && pwd ) -# Backup FK/PK/unique constraints -DB_URI="postgresql://zenodo:zenodo@localhost/zenodo" +LEGACY_DB_URI="service=zenodo-legacy" +DB_URI="service=zenodo-dev" +# Backup FK/PK/unique constraints psql $DB_URI --tuples-only --quiet -f scripts/gen_create_constraints.sql > scripts/create_constraints.sql psql $DB_URI --tuples-only --quiet -f scripts/gen_delete_constraints.sql > scripts/delete_constraints.sql @@ -51,6 +52,11 @@ python -m zenodo_rdm_migrator "streams.yaml" # TODO: These should be fixed in the legacy/source # Apply various consistency fixes +# Import OAuthclient models +psql $DB_URI -f scripts/oauthclient_remoteaccount_dump.sql > "dumps/oauthclient_remoteaccount.bin" +psql $DB_URI -f scripts/oauthclient_remotetoken_dump.sql > "dumps/oauthclient_remotetoken.bin" +pv dumps/oauthclient_remoteaccount.bin | psql $DB_URI -c "COPY oauthclient_remoteaccount (id, user_id, client_id, extra_data, created, updated) FROM STDIN (FORMAT binary);" +pv dumps/oauthclient_remotetoken.bin | psql $DB_URI -c "COPY oauthclient_remotetoken (id_remote_account, token_type, access_token, secret, created, updated) FROM STDIN (FORMAT binary);" # Restore FK/PK/unique constraints and indices psql $DB_URI -f scripts/create_constraints.sql diff --git a/migrator/scripts/oauthclient_remoteaccount_dump.sql b/migrator/scripts/oauthclient_remoteaccount_dump.sql index 72d7179b..9b61cea1 100644 --- a/migrator/scripts/oauthclient_remoteaccount_dump.sql +++ b/migrator/scripts/oauthclient_remoteaccount_dump.sql @@ -7,4 +7,4 @@ COPY ( created, updated FROM oauthclient_remoteaccount -) TO STDOUT WITH (FORMAT csv); +) TO STDOUT WITH (FORMAT binary); diff --git a/migrator/scripts/oauthclient_remotetoken_dump.sql b/migrator/scripts/oauthclient_remotetoken_dump.sql index 3434ba99..da5908d1 100644 --- a/migrator/scripts/oauthclient_remotetoken_dump.sql +++ b/migrator/scripts/oauthclient_remotetoken_dump.sql @@ -8,4 +8,4 @@ COPY ( updated FROM oauthclient_remotetoken -) TO STDOUT WITH (FORMAT csv); +) TO STDOUT WITH (FORMAT binary); From 1fa9cfa6fb3c32a38117cba3425dc263712ad790 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Thu, 21 Sep 2023 03:47:23 +0200 Subject: [PATCH 17/35] migrator: fix deleted records transformation --- migrator/scripts/deleted_records_dump.sql | 5 +- migrator/zenodo_rdm_migrator/errors.py | 9 ++ migrator/zenodo_rdm_migrator/stream.py | 10 +- .../zenodo_rdm_migrator/transform/__init__.py | 3 +- .../transform/entries/records/records.py | 21 +-- .../zenodo_rdm_migrator/transform/records.py | 120 +++++++++++++++++- 6 files changed, 141 insertions(+), 27 deletions(-) diff --git a/migrator/scripts/deleted_records_dump.sql b/migrator/scripts/deleted_records_dump.sql index bb5673ff..aa8864ec 100644 --- a/migrator/scripts/deleted_records_dump.sql +++ b/migrator/scripts/deleted_records_dump.sql @@ -8,6 +8,7 @@ COPY ( removal_json, removal_date, version_id, + recid, transaction_id ) AS ( SELECT @@ -15,6 +16,7 @@ COPY ( r.json as removal_json, r.updated as removal_date, r.version_id, + p.pid_value, r.transaction_id FROM records_metadata_version as r @@ -30,8 +32,9 @@ COPY ( r.id as id, r.json as json, r.created as created, - dr.removal_date as updated, dr.version_id as version_id, + dr.recid as recid, + dr.removal_date as updated, dr.removal_json as removal_json, dr.removal_date as removal_date FROM diff --git a/migrator/zenodo_rdm_migrator/errors.py b/migrator/zenodo_rdm_migrator/errors.py index 0a283487..70df7e6a 100644 --- a/migrator/zenodo_rdm_migrator/errors.py +++ b/migrator/zenodo_rdm_migrator/errors.py @@ -21,6 +21,15 @@ def description(self): return f"No conceptrecid for draft: {self.draft}" +class InvalidTombstoneRecord(Exception): + """Invalid tombstone record error.""" + + @property + def description(self): + """Exception's description.""" + return "Not possible to generate tombstone record from entry." + + class InvalidIdentifier(Exception): """Invalid identifiers, for example a missing scheme.""" diff --git a/migrator/zenodo_rdm_migrator/stream.py b/migrator/zenodo_rdm_migrator/stream.py index 92bf4126..76be7a3b 100644 --- a/migrator/zenodo_rdm_migrator/stream.py +++ b/migrator/zenodo_rdm_migrator/stream.py @@ -29,6 +29,7 @@ OAuthServerTokenTransform, ) from invenio_rdm_migrator.streams.records import ( + RDMDeletedRecordCopyLoad, RDMDraftCopyLoad, RDMRecordCopyLoad, RDMVersionStateCopyLoad, @@ -39,6 +40,7 @@ from .extract import KafkaExtract from .transform import ( ZenodoCommunityTransform, + ZenodoDeletedRecordTransform, ZenodoRecordTransform, ZenodoRequestTransform, ZenodoUserTransform, @@ -72,15 +74,15 @@ DeletedRecordStreamDefinition = StreamDefinition( name="deleted_records", extract_cls=JSONLExtract, - transform_cls=ZenodoRecordTransform, - load_cls=RDMRecordCopyLoad, + transform_cls=ZenodoDeletedRecordTransform, + load_cls=RDMDeletedRecordCopyLoad, ) """ETL stream for Zenodo deleted records.""" VersionStateStreamDefinition = StreamDefinition( name="version_state", - extract_cls=JSONLExtract, - transform_cls=ZenodoRecordTransform, + extract_cls=None, + transform_cls=None, load_cls=RDMVersionStateCopyLoad, ) """ETL stream for version state.""" diff --git a/migrator/zenodo_rdm_migrator/transform/__init__.py b/migrator/zenodo_rdm_migrator/transform/__init__.py index 5a58a981..9a7179d2 100644 --- a/migrator/zenodo_rdm_migrator/transform/__init__.py +++ b/migrator/zenodo_rdm_migrator/transform/__init__.py @@ -9,7 +9,7 @@ from .communities import ZenodoCommunityTransform -from .records import ZenodoRecordTransform +from .records import ZenodoDeletedRecordTransform, ZenodoRecordTransform from .requests import ZenodoRequestTransform from .users import ZenodoUserTransform @@ -18,4 +18,5 @@ ZenodoRecordTransform, ZenodoRequestTransform, ZenodoUserTransform, + ZenodoDeletedRecordTransform, ) diff --git a/migrator/zenodo_rdm_migrator/transform/entries/records/records.py b/migrator/zenodo_rdm_migrator/transform/entries/records/records.py index 1f9bd31d..34741250 100644 --- a/migrator/zenodo_rdm_migrator/transform/entries/records/records.py +++ b/migrator/zenodo_rdm_migrator/transform/entries/records/records.py @@ -72,7 +72,7 @@ def _pids(self, entry): def _bucket_id(self, entry): """Transform the bucket of a record.""" - return entry["json"]["_buckets"]["record"] + return entry["json"]["_buckets"].get("record") def _media_bucket_id(self, entry): """Transform the media bucket of a record.""" @@ -108,25 +108,6 @@ def _custom_fields(self, entry): """Transform custom fields.""" return ZenodoCustomFieldsEntry.transform(entry["json"]) - def _tombstone(self, entry): - """Transform tombstone.""" - removal_json = entry.get("removal_json") - if removal_json: - removed_by = removal_json.get("removed_by") or None - if isinstance(removed_by, int): - removed_by = {"user": str(removed_by)} - note = removal_json.get("removal_reason") or "" - removal_reason = {"id": "spam"} if "spam" in note.lower() else None - removal_date = entry.get("removal_date") - return { - "note": note, - "is_visible": True, - "removed_by": removed_by, - "removal_date": removal_date, - "citation_text": None, - "removal_reason": removal_reason, - } - class ZenodoDraftEntry(ZenodoRecordEntry): """Zenodo draft transform. diff --git a/migrator/zenodo_rdm_migrator/transform/records.py b/migrator/zenodo_rdm_migrator/transform/records.py index 1b279603..0beb8b88 100644 --- a/migrator/zenodo_rdm_migrator/transform/records.py +++ b/migrator/zenodo_rdm_migrator/transform/records.py @@ -9,7 +9,9 @@ from invenio_rdm_migrator.streams.records import RDMRecordTransform -from .entries.parents import ParentRecordEntry +from zenodo_rdm_migrator.errors import InvalidTombstoneRecord + +from .entries.parents import ZENODO_DATACITE_PREFIX, ParentRecordEntry from .entries.records.records import ZenodoDraftEntry, ZenodoRecordEntry @@ -50,3 +52,119 @@ def _transform(self, entry): "record": self._record(entry), "parent": self._parent(entry), } + + +class ZenodoDeletedRecordTransform(RDMRecordTransform): + """Zenodo to RDM Record class for data transformation.""" + + REMOVAL_REASONS_MAPPING = { + "spam": "spam", + "takedown": "take-down-request", + "duplicate": "duplicate", + "fraud": "fraud", + "copyright": "copyright", + "personal data": "personal-data", + "misconduct": "misconduct", + "test": "test-record", + } + + def _parent(self, entry): + """Extract a parent record.""" + transformed = { + "created": entry.get("created"), + "updated": entry.get("updated"), + "version_id": entry.get("version_id"), + } + parent_pid = entry["json"].get("conceptrecid") + transformed["json"] = {"id": parent_pid, "communities": {}} + owner = next(iter(entry["json"].get("owners", [])), None) + if owner is not None: + transformed["json"]["access"] = {"owned_by": {"user": owner}} + + pids = {} + doi = entry["json"].get("doi") + conceptdoi = entry["json"].get("conceptdoi") + if doi and doi.startswith(ZENODO_DATACITE_PREFIX): + if conceptdoi: + pids["doi"] = { + "client": "datacite", + "provider": "datacite", + "identifier": conceptdoi, + } + else: # old Zenodo DOI record without concept DOI + pids["doi"] = {"provider": "legacy", "identifier": ""} + transformed["json"]["pids"] = pids + + return transformed + + def _draft(self, entry): + """Transform the draft.""" + pass + + def _tombstone(self, entry): + """Transform tombstone.""" + removal_json = entry.get("removal_json") + if removal_json: + removed_by = removal_json.get("removed_by") or None + if isinstance(removed_by, int): + removed_by = {"user": str(removed_by)} + removal_date = entry.get("removal_date") + + removal_reason = None + note = removal_json.get("removal_reason") or "" + if note: + if isinstance(note, list): + if len(note) == 2: + removal_reason, note = note + # NOTE: We sometimes used this format for takedowns only + if removal_reason == "takedown": + removal_reason = "take-down-request" + else: + removal_reason = None + note = "" + elif isinstance(note, str): + note_words = note.lower().split() + for reason_match, reason_id in self.REMOVAL_REASONS_MAPPING.items(): + if reason_match in note_words: + removal_reason = {"id": reason_id} + break + + return { + "note": note, + "is_visible": True, + "removed_by": removed_by, + "removal_date": removal_date, + "citation_text": None, + "removal_reason": removal_reason, + } + + def _record(self, entry): + """Extract a record.""" + tombstone = self._tombstone(entry) + try: + # We try our best the usual transformation + res = ZenodoRecordEntry().transform(entry) + res["json"]["tombstone"] = tombstone + return res + except Exception: + # If that fails too, then we only need enough for a PID and tombstone + recid = entry.get("recid", str(entry["json"].get("recid", ""))) + if not recid: + raise InvalidTombstoneRecord() + return { + "created": entry.get("created"), + "updated": entry.get("updated"), + "version_id": entry.get("version_id"), + "json": {"id": recid, "tombstone": tombstone}, + "index": entry.get("index", 0) + 1 + } + + def _transform(self, entry): + """Transform a single entry.""" + record = self._record(entry) + if not record: + raise InvalidTombstoneRecord() + return { + "record": record, + "parent": self._parent(entry), + } From 2f2af3649f3c8a538af2f7f54d5853a044013312 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Thu, 21 Sep 2023 15:21:55 +0200 Subject: [PATCH 18/35] migrator: fix scripts resilience --- migrator/README.md | 2 +- migrator/migrate.sh | 9 --------- migrator/scripts/create_missing_indices.sql | 20 ++++++++++---------- migrator/scripts/update_sequences.sql | 1 - 4 files changed, 11 insertions(+), 21 deletions(-) diff --git a/migrator/README.md b/migrator/README.md index 3a8d3ed5..2f22ebef 100644 --- a/migrator/README.md +++ b/migrator/README.md @@ -228,5 +228,5 @@ psql $DB_URI -f scripts/webhook_events_dump.sql > "dumps/webhook_events-$(date - # GitHub repositories psql $DB_URI -f scripts/github_repositories_dump.sql > "dumps/github_repositories-$(date -I).csv" # GitHub releases -psql $DB_URI -f scripts/github_releases_dump.sql > "dumps/github_releases-$(date -I).jsonl" +psql $DB_URI -f scripts/github_releases_dump.sql | sed 's/\\\\/\\/g' > "dumps/github_releases-$(date -I).jsonl" ``` diff --git a/migrator/migrate.sh b/migrator/migrate.sh index f5a34e21..81355b37 100755 --- a/migrator/migrate.sh +++ b/migrator/migrate.sh @@ -70,15 +70,6 @@ psql $DB_URI -f scripts/create_missing_indices.sql # Fixtures invenio rdm-records fixtures -invenio vocabularies import -v names -f $script_path/app_data/vocabularies-future.yaml # zenodo specific names - -# TODO: Load these via regular migration streams -# Load awards via COPY -pv awards.csv | psql $DB_URI -c "COPY award_metadata (id, pid, json, created, updated, version_id) FROM STDIN (FORMAT csv);" - -# Truncate any previous funders (e.g. from fixtures), and load funders via copy -psql $DB_URI -c "truncate funder_metadata" -pv funders.csv | psql $DB_URI -c "COPY funder_metadata (id, pid, json, created, updated, version_id) FROM STDIN (FORMAT csv);" # Reindex records and communities invenio rdm-records rebuild-index diff --git a/migrator/scripts/create_missing_indices.sql b/migrator/scripts/create_missing_indices.sql index a58abd44..79f16db1 100644 --- a/migrator/scripts/create_missing_indices.sql +++ b/migrator/scripts/create_missing_indices.sql @@ -1,10 +1,10 @@ -CREATE INDEX idx_accounts_user_displayname ON accounts_user USING btree (displayname); -CREATE INDEX idx_communities_members_group_id ON communities_members USING btree (group_id); -CREATE INDEX idx_communities_members_user_id ON communities_members USING btree (user_id); -CREATE INDEX idx_communities_metadata_bucket_id ON communities_metadata USING btree (bucket_id); -CREATE INDEX idx_files_files_last_check ON files_files USING btree (last_check); -CREATE INDEX idx_pidstore_pid_pid_value ON pidstore_pid USING btree (pid_value); -CREATE INDEX idx_rdm_parents_community_request_id ON rdm_parents_community USING btree (request_id); -CREATE INDEX idx_rdm_records_files_record_id ON rdm_records_files USING btree (record_id); -CREATE INDEX idx_rdm_records_metadata_bucket_id ON rdm_records_metadata USING btree (bucket_id); -CREATE INDEX idx_rdm_versions_state_next_draft_id ON rdm_versions_state USING btree (next_draft_id); +CREATE INDEX IF NOT EXISTS idx_accounts_user_displayname ON accounts_user USING btree (displayname); +CREATE INDEX IF NOT EXISTS idx_communities_members_group_id ON communities_members USING btree (group_id); +CREATE INDEX IF NOT EXISTS idx_communities_members_user_id ON communities_members USING btree (user_id); +CREATE INDEX IF NOT EXISTS idx_communities_metadata_bucket_id ON communities_metadata USING btree (bucket_id); +CREATE INDEX IF NOT EXISTS idx_files_files_last_check ON files_files USING btree (last_check); +CREATE INDEX IF NOT EXISTS idx_pidstore_pid_pid_value ON pidstore_pid USING btree (pid_value); +CREATE INDEX IF NOT EXISTS idx_rdm_parents_community_request_id ON rdm_parents_community USING btree (request_id); +CREATE INDEX IF NOT EXISTS idx_rdm_records_files_record_id ON rdm_records_files USING btree (record_id); +CREATE INDEX IF NOT EXISTS idx_rdm_records_metadata_bucket_id ON rdm_records_metadata USING btree (bucket_id); +CREATE INDEX IF NOT EXISTS idx_rdm_versions_state_next_draft_id ON rdm_versions_state USING btree (next_draft_id); diff --git a/migrator/scripts/update_sequences.sql b/migrator/scripts/update_sequences.sql index dcf636ee..1084d760 100644 --- a/migrator/scripts/update_sequences.sql +++ b/migrator/scripts/update_sequences.sql @@ -1,5 +1,4 @@ SELECT setval(pg_get_serial_sequence('access_actionssystemroles', 'id'), COALESCE(max(id) + 1, 1), false) FROM access_actionssystemroles; -SELECT setval(pg_get_serial_sequence('accounts_role', 'id'), COALESCE(max(id) + 1, 1), false) FROM accounts_role; SELECT setval(pg_get_serial_sequence('accounts_user', 'id'), COALESCE(max(id) + 1, 1), false) FROM accounts_user; SELECT setval(pg_get_serial_sequence('banners', 'id'), COALESCE(max(id) + 1, 1), false) FROM banners; SELECT setval(pg_get_serial_sequence('files_location', 'id'), COALESCE(max(id) + 1, 1), false) FROM files_location; From e53ab9be3bd59ebe7831e5403595ce9e8894f500 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Fri, 22 Sep 2023 14:49:33 +0200 Subject: [PATCH 19/35] extract: yield Kafka transactions in commit LSN order --- migrator/tests/extract/test_kafka_extract.py | 14 ++--- migrator/zenodo_rdm_migrator/extract/kafka.py | 51 ++++++++++++------- .../zenodo_rdm_migrator/transform/records.py | 2 +- 3 files changed, 42 insertions(+), 25 deletions(-) diff --git a/migrator/tests/extract/test_kafka_extract.py b/migrator/tests/extract/test_kafka_extract.py index 06f7a7e4..75be4adb 100644 --- a/migrator/tests/extract/test_kafka_extract.py +++ b/migrator/tests/extract/test_kafka_extract.py @@ -109,12 +109,14 @@ def _assert_result( all(isinstance(o["op"], OperationType) for o in t.operations) for t in result ) tx_dict = {t.id: t for t in result} - tx_ids = list(tx_dict.keys()) - assert tx_ids == sorted(tx_ids) - assert tx_ids[0] == first_tx_id - assert tx_ids[-1] == last_tx_id - assert set(extra_tx_ids or []) <= set(tx_ids) - assert len(set(excluded_tx_ids or []).intersection(set(tx_ids))) == 0 + tx_lsn_list = [(t.id, t.commit_lsn) for t in result] + assert tx_lsn_list == sorted(tx_lsn_list, key=lambda x: x[1]) + assert tx_lsn_list[0][0] == first_tx_id + assert tx_lsn_list[-1][0] == last_tx_id + assert set(extra_tx_ids or []) <= set(tx_lsn_list) + assert ( + len(set(excluded_tx_ids or []).intersection({t for t, _ in tx_lsn_list})) == 0 + ) for tx_id, op_counts in (tx_op_counts or {}).items(): _assert_op_counts(tx_dict[tx_id], op_counts) diff --git a/migrator/zenodo_rdm_migrator/extract/kafka.py b/migrator/zenodo_rdm_migrator/extract/kafka.py index afb7c044..1c0cb38b 100644 --- a/migrator/zenodo_rdm_migrator/extract/kafka.py +++ b/migrator/zenodo_rdm_migrator/extract/kafka.py @@ -18,15 +18,16 @@ from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType from invenio_rdm_migrator.logging import Logger from kafka import KafkaConsumer, TopicPartition -from sortedcontainers import SortedDict, SortedList +from sortedcontainers import SortedList class _TxState: """Transaction state, internally used in the Kafka extract only.""" - def __init__(self, id, info=None): + def __init__(self, id, commit_lsn=None, info=None): """Constructor.""" self.id = id + self.commit_lsn = commit_lsn self.info = info # We order operations based on the Postgres LSN self.ops = SortedList(key=lambda o: o["source"]["lsn"]) @@ -91,6 +92,7 @@ class KafkaExtract(Extract): ops_topic="zenodo-migration.public", tx_topic="zenodo-migration.postgres_transaction", last_tx=563385187, + last_lsn=563385187, offset=datetime.utcnow() - timedelta(minutes=5), config={ "bootstrap_servers": [ @@ -155,7 +157,7 @@ def __init__( assert last_tx is not None, "`last_tx` is required." self.last_tx = last_tx self.config = config or {} - self.tx_registry = SortedDict({}) + self.tx_registry = {} self.tx_buffer = tx_buffer self.max_tx_info_fetch = max_tx_info_fetch self.max_ops_fetch = max_ops_fetch @@ -241,7 +243,7 @@ def iter_tx_info(self): consumer.commit() continue - tx_id, lsn = map(int, tx_msg.value["id"].split(":")) + tx_id, tx_lsn = map(int, tx_msg.value["id"].split(":")) # We drop anything before the configured last transaction ID if tx_id <= self.last_tx: consumer.commit() @@ -252,7 +254,7 @@ def iter_tx_info(self): continue elif tx_msg.value["status"] == "END": consumer.commit() - yield (tx_id, tx_msg.value) + yield ((tx_id, tx_lsn), tx_msg.value) @staticmethod def _filter_unchanged_values(msg): @@ -291,17 +293,23 @@ def iter_ops(self): def _yield_completed_tx(self, min_batch=None): """Yields completed transactions. - Important: we only yield the "earliest" transactions that have been completed in - order. For example: + Important: we only yield the "earliest" commited transactions for which we have + complete data/ops. The commit order of transactions is defined by the COMMIT + operation's LSN. For example: - 1. We have Tx1, Tx2, and Tx3 in ``self.tx_registry``. - 2. Tx2 and Tx3 are complete, but Tx1 is still has missing operations (that will - come in a later iteration of the operations consumer). - 3. In this case we don't yield Tx2 and Tx3, since we're still waiting for Tx1 - to complete, so that we can return all the completed transactions in order. + 1. We have Tx1 (LSN:50), Tx2 (LSN:30), and Tx3 (LSN:10) in ``self.tx_registry``. + 2. We have complete data for Tx1 and Tx2, but not for Tx3. It has missing + operations that will come in a later iteration of the operations consumer. + 3. In this case we don't yield Tx1 and Tx2, since we're still waiting for Tx3 + to have complete data, so that we can return all the completed transactions + by their LSN order. """ + lsn_sorted_tx = sorted( + self.tx_registry.values(), + key=lambda t: (t.commit_lsn is None, t.commit_lsn), + ) completed_tx_batch = [] - for tx_state in self.tx_registry.values(): + for tx_state in lsn_sorted_tx: if not tx_state.complete: # We stop at the first non-completed transaction break @@ -318,8 +326,8 @@ def _yield_completed_tx(self, min_batch=None): for tx in completed_tx_batch: del self.tx_registry[tx.id] # Keep track of the last yielded transaction ID - self._last_yielded_tx = tx.id - yield Tx(id=tx.id, operations=list(tx.ops)) + self._last_yielded_tx = (tx.id, tx.commit_lsn) + yield Tx(id=tx.id, commit_lsn=tx.commit_lsn, operations=list(tx.ops)) def run(self): """Return a blocking generator yielding completed transactions.""" @@ -334,11 +342,16 @@ def run(self): tx_info_stream = itertools.islice( tx_info_stream, self.max_tx_info_fetch ) - for tx_id, tx_info in tx_info_stream: + for (tx_id, tx_lsn), tx_info in tx_info_stream: if tx_id in self.tx_registry: self.tx_registry[tx_id].info = tx_info + self.tx_registry[tx_id].commit_lsn = tx_lsn else: - self.tx_registry[tx_id] = _TxState(tx_id, tx_info) + self.tx_registry[tx_id] = _TxState( + tx_id, + commit_lsn=tx_lsn, + info=tx_info, + ) # We then consume operations and build up the (pending) transactions in # the registry. @@ -349,7 +362,9 @@ def run(self): tx_state = self.tx_registry.setdefault(tx_id, _TxState(tx_id)) tx_state.append(op) if tx_state.complete: - self.logger.info(f"Completed transaction {tx_state.id}") + self.logger.info( + f"Completed transaction {tx_state.id}:{tx_state.commit_lsn}" + ) yield from self._yield_completed_tx(min_batch=self.tx_buffer) diff --git a/migrator/zenodo_rdm_migrator/transform/records.py b/migrator/zenodo_rdm_migrator/transform/records.py index 0beb8b88..81a8e905 100644 --- a/migrator/zenodo_rdm_migrator/transform/records.py +++ b/migrator/zenodo_rdm_migrator/transform/records.py @@ -156,7 +156,7 @@ def _record(self, entry): "updated": entry.get("updated"), "version_id": entry.get("version_id"), "json": {"id": recid, "tombstone": tombstone}, - "index": entry.get("index", 0) + 1 + "index": entry.get("index", 0) + 1, } def _transform(self, entry): From b8874409466b208e00db6f9fd62eb829f8efc361 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Fri, 22 Sep 2023 15:30:54 +0200 Subject: [PATCH 20/35] migrator: add logging to Kafka extract --- migrator/zenodo_rdm_migrator/extract/kafka.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/migrator/zenodo_rdm_migrator/extract/kafka.py b/migrator/zenodo_rdm_migrator/extract/kafka.py index 1c0cb38b..94aa2eb2 100644 --- a/migrator/zenodo_rdm_migrator/extract/kafka.py +++ b/migrator/zenodo_rdm_migrator/extract/kafka.py @@ -92,7 +92,6 @@ class KafkaExtract(Extract): ops_topic="zenodo-migration.public", tx_topic="zenodo-migration.postgres_transaction", last_tx=563385187, - last_lsn=563385187, offset=datetime.utcnow() - timedelta(minutes=5), config={ "bootstrap_servers": [ @@ -246,6 +245,7 @@ def iter_tx_info(self): tx_id, tx_lsn = map(int, tx_msg.value["id"].split(":")) # We drop anything before the configured last transaction ID if tx_id <= self.last_tx: + self.logger.info(f"Skipped {tx_id} at offset: {tx_msg.offset}") consumer.commit() continue if tx_msg.value["status"] == "BEGIN": @@ -342,6 +342,7 @@ def run(self): tx_info_stream = itertools.islice( tx_info_stream, self.max_tx_info_fetch ) + self.logger.info("Started streaming tx info") for (tx_id, tx_lsn), tx_info in tx_info_stream: if tx_id in self.tx_registry: self.tx_registry[tx_id].info = tx_info @@ -352,12 +353,14 @@ def run(self): commit_lsn=tx_lsn, info=tx_info, ) + self.logger.info("Stopped streaming tx info") # We then consume operations and build up the (pending) transactions in # the registry. ops_stream = self.iter_ops() if self.max_ops_fetch: ops_stream = itertools.islice(ops_stream, self.max_ops_fetch) + self.logger.info("Started streaming ops") for tx_id, op in ops_stream: tx_state = self.tx_registry.setdefault(tx_id, _TxState(tx_id)) tx_state.append(op) @@ -365,6 +368,7 @@ def run(self): self.logger.info( f"Completed transaction {tx_state.id}:{tx_state.commit_lsn}" ) + self.logger.info("Stopped streaming ops") yield from self._yield_completed_tx(min_batch=self.tx_buffer) From c68057754d7505ee3b5bcfa819434765584d0afc Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Fri, 22 Sep 2023 17:09:46 +0200 Subject: [PATCH 21/35] extract: add better offsets support --- migrator/zenodo_rdm_migrator/extract/kafka.py | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/migrator/zenodo_rdm_migrator/extract/kafka.py b/migrator/zenodo_rdm_migrator/extract/kafka.py index 94aa2eb2..8faf8d38 100644 --- a/migrator/zenodo_rdm_migrator/extract/kafka.py +++ b/migrator/zenodo_rdm_migrator/extract/kafka.py @@ -141,18 +141,17 @@ def __init__( max_tx_info_fetch=200, max_ops_fetch=2000, config=None, - offset="earliest", + tx_offset="earliest", + ops_offset="earliest", remove_unchanged_fields=True, _dump_dir=None, ): """Constructor.""" - self.ops_topic = ops_topic self.tx_topic = tx_topic - if isinstance(offset, datetime): - offset = int(offset.timestamp() * 1000) - if isinstance(offset, str): - assert offset in ("earliest", "latest") - self.offset = offset + self.tx_offset = tx_offset + self.ops_topic = ops_topic + self.ops_offset = ops_offset + assert last_tx is not None, "`last_tx` is required." self.last_tx = last_tx self.config = config or {} @@ -181,8 +180,12 @@ def _seek_offsets(self, consumer, topic, target_offset="earliest"): TopicPartition(topic, p): None for p in consumer.partitions_for_topic(topic) } consumer.assign(partitions) - # If we have a target timestamp to start from, use it... if isinstance(target_offset, int): + partitions.update({p: target_offset for p in partitions}) + if isinstance(target_offset, dict): + partitions.update({p: target_offset[p.partition] for p in partitions}) + if isinstance(target_offset, datetime): + target_offset = int(target_offset.timestamp() * 1000) offsets = consumer.offsets_for_times({p: target_offset for p in partitions}) partitions.update({p: o for p, (o, _) in offsets.items()}) elif isinstance(target_offset, str): @@ -204,7 +207,7 @@ def _seek_committed_offsets(self, consumer, topic): for partition, offset in zip(partitions, offsets): consumer.seek(partition, offset) - def _get_consumer(self, topic, group_id): + def _get_consumer(self, topic, group_id, offset): consumer = KafkaConsumer( group_id=group_id, **self.DEFAULT_CONSUMER_CFG, @@ -214,7 +217,7 @@ def _get_consumer(self, topic, group_id): self._topic_states[topic] = self._seek_offsets( consumer, topic, - target_offset=self.offset, + target_offset=offset, ) else: self._seek_committed_offsets(consumer, topic) @@ -223,11 +226,19 @@ def _get_consumer(self, topic, group_id): # NOTE: These two properties are useful for tests/mocking @property def _tx_consumer(self): - return self._get_consumer(self.tx_topic, "zenodo_migration_tx") + return self._get_consumer( + self.tx_topic, + "zenodo_migration_tx", + self.tx_offset, + ) @property def _ops_consumer(self): - return self._get_consumer(self.ops_topic, "zenodo_migration_ops") + return self._get_consumer( + self.ops_topic, + "zenodo_migration_ops", + self.ops_offset, + ) def iter_tx_info(self): """Yield commited transactions info.""" From d3905a39998ddec7b23e75b002860e16a748961d Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Fri, 22 Sep 2023 23:48:01 +0200 Subject: [PATCH 22/35] migrator: add ignored actions --- .../actions/transform/__init__.py | 8 ++-- .../actions/transform/ignored.py | 38 +++++++++++++++++++ .../actions/transform/users.py | 7 ++++ 3 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 migrator/zenodo_rdm_migrator/actions/transform/ignored.py diff --git a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py index 55bdd449..4d1e083d 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py @@ -32,7 +32,8 @@ OAuthServerTokenDeleteAction, OAuthServerTokenUpdateAction, ) -from .users import UserDeactivationAction, UserEditAction, UserRegistrationAction +from .users import USER_ACTIONS +from .ignored import IGNORED_ACTIONS __all__ = ( "CommunityCreateAction", @@ -56,7 +57,6 @@ "OAuthServerTokenUpdateAction", "ReleaseReceiveAction", "ReleaseUpdateAction", - "UserDeactivationAction", - "UserEditAction", - "UserRegistrationAction", + "IGNORED_ACTIONS", + "USER_ACTIONS", ) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/ignored.py b/migrator/zenodo_rdm_migrator/actions/transform/ignored.py new file mode 100644 index 00000000..6bc3bfe5 --- /dev/null +++ b/migrator/zenodo_rdm_migrator/actions/transform/ignored.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2023 CERN. +# +# Invenio-RDM-Migrator is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. + +"""ZenodoRDM migration ignored actions module.""" + + +from invenio_rdm_migrator.actions import TransformAction +from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType +from invenio_rdm_migrator.streams.actions import load + + +class FileChecksumAction(TransformAction): + """Zenodo to RDM for file checksum.""" + + name = "file-checksum" + load_cls = load.IgnoredAction + + @classmethod + def matches_action(cls, tx): + """Checks for a single .""" + table_ops = [(o["source"]["table"], o["op"]) for o in tx.operations] + if table_ops == [("files_files", OperationType.UPDATE)]: + changed_keys = tx.operations[0].get("after", {}).keys() + return {"last_check", "last_check_at"} < changed_keys + return False + + def _transform_data(self): + """Transforms the data and returns an instance of the mapped_cls.""" + return {} + + +IGNORED_ACTIONS = [ + FileChecksumAction, +] diff --git a/migrator/zenodo_rdm_migrator/actions/transform/users.py b/migrator/zenodo_rdm_migrator/actions/transform/users.py index 9733523f..e0c54d76 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/users.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/users.py @@ -165,3 +165,10 @@ def _transform_data(self): return dict( tx_id=self.tx.id, user=user, login_information=login_info, sessions=sessions ) + + +USER_ACTIONS = [ + UserRegistrationAction, + UserEditAction, + UserDeactivationAction, +] From 951426897e5bb7e0710f13f29f820acf135d3d1a Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 01:53:14 +0200 Subject: [PATCH 23/35] migrator: reorganize action imports --- .../communities/test_community_actions.py | 2 +- .../actions/drafts/test_drafts_actions.py | 2 +- .../tests/actions/files/test_files_actions.py | 2 +- .../actions/github/test_github_actions.py | 2 +- .../tests/actions/oauth/test_oauth_actions.py | 2 +- .../tests/actions/users/test_user_actions.py | 2 +- .../actions/transform/__init__.py | 58 ++++-------------- .../actions/transform/communities.py | 7 +++ .../actions/transform/drafts.py | 7 +++ .../actions/transform/files.py | 5 ++ .../actions/transform/github.py | 9 +++ .../actions/transform/oauth.py | 13 ++++ .../transform/transactions.py | 60 +++++-------------- 13 files changed, 72 insertions(+), 99 deletions(-) diff --git a/migrator/tests/actions/communities/test_community_actions.py b/migrator/tests/actions/communities/test_community_actions.py index 1cc6ee68..f9391b99 100644 --- a/migrator/tests/actions/communities/test_community_actions.py +++ b/migrator/tests/actions/communities/test_community_actions.py @@ -12,7 +12,7 @@ from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType from invenio_rdm_migrator.streams.actions import load -from zenodo_rdm_migrator.actions.transform import ( +from zenodo_rdm_migrator.actions.transform.communities import ( CommunityCreateAction, CommunityDeleteAction, CommunityUpdateAction, diff --git a/migrator/tests/actions/drafts/test_drafts_actions.py b/migrator/tests/actions/drafts/test_drafts_actions.py index a1d4eda4..d7fd5112 100644 --- a/migrator/tests/actions/drafts/test_drafts_actions.py +++ b/migrator/tests/actions/drafts/test_drafts_actions.py @@ -12,7 +12,7 @@ from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType from invenio_rdm_migrator.streams.actions import load -from zenodo_rdm_migrator.actions.transform import ( +from zenodo_rdm_migrator.actions.transform.drafts import ( DraftCreateAction, DraftEditAction, DraftPublishAction, diff --git a/migrator/tests/actions/files/test_files_actions.py b/migrator/tests/actions/files/test_files_actions.py index 2d8d129b..6de4527c 100644 --- a/migrator/tests/actions/files/test_files_actions.py +++ b/migrator/tests/actions/files/test_files_actions.py @@ -12,7 +12,7 @@ from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType from invenio_rdm_migrator.streams.actions import load -from zenodo_rdm_migrator.actions.transform import DraftFileUploadAction +from zenodo_rdm_migrator.actions.transform.files import DraftFileUploadAction def test_matches_with_valid_data(file_upload_tx): diff --git a/migrator/tests/actions/github/test_github_actions.py b/migrator/tests/actions/github/test_github_actions.py index 5bd49a70..d67ed1a1 100644 --- a/migrator/tests/actions/github/test_github_actions.py +++ b/migrator/tests/actions/github/test_github_actions.py @@ -15,7 +15,7 @@ from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType from invenio_rdm_migrator.streams.actions import load -from zenodo_rdm_migrator.actions.transform import ( +from zenodo_rdm_migrator.actions.transform.github import ( HookEventCreateAction, HookEventUpdateAction, HookRepoUpdateAction, diff --git a/migrator/tests/actions/oauth/test_oauth_actions.py b/migrator/tests/actions/oauth/test_oauth_actions.py index 286f9296..efdc97a9 100644 --- a/migrator/tests/actions/oauth/test_oauth_actions.py +++ b/migrator/tests/actions/oauth/test_oauth_actions.py @@ -15,7 +15,7 @@ from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType from invenio_rdm_migrator.streams.actions import load -from zenodo_rdm_migrator.actions.transform import ( +from zenodo_rdm_migrator.actions.transform.oauth import ( OAuthApplicationCreateAction, OAuthApplicationDeleteAction, OAuthApplicationUpdateAction, diff --git a/migrator/tests/actions/users/test_user_actions.py b/migrator/tests/actions/users/test_user_actions.py index 5b3e6c2e..063ecfde 100644 --- a/migrator/tests/actions/users/test_user_actions.py +++ b/migrator/tests/actions/users/test_user_actions.py @@ -12,7 +12,7 @@ from invenio_rdm_migrator.load.postgresql.transactions.operations import OperationType from invenio_rdm_migrator.streams.actions import load -from zenodo_rdm_migrator.actions.transform import ( +from zenodo_rdm_migrator.actions.transform.users import ( UserDeactivationAction, UserEditAction, UserRegistrationAction, diff --git a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py index 4d1e083d..ac2b470e 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/__init__.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/__init__.py @@ -7,56 +7,20 @@ """Transform actions module.""" -from .communities import ( - CommunityCreateAction, - CommunityDeleteAction, - CommunityUpdateAction, -) -from .drafts import DraftCreateAction, DraftEditAction, DraftPublishAction -from .files import DraftFileUploadAction -from .github import ( - HookEventCreateAction, - HookEventUpdateAction, - HookRepoUpdateAction, - ReleaseReceiveAction, - ReleaseUpdateAction, -) -from .oauth import ( - OAuthApplicationCreateAction, - OAuthApplicationDeleteAction, - OAuthApplicationUpdateAction, - OAuthGHDisconnectToken, - OAuthLinkedAccountConnectAction, - OAuthLinkedAccountDisconnectAction, - OAuthServerTokenCreateAction, - OAuthServerTokenDeleteAction, - OAuthServerTokenUpdateAction, -) -from .users import USER_ACTIONS +from .communities import COMMUNITY_ACTIONS +from .drafts import DRAFT_ACTIONS +from .files import FILES_ACTIONS +from .github import GITHUB_ACTIONS from .ignored import IGNORED_ACTIONS +from .oauth import OAUTH_ACTIONS +from .users import USER_ACTIONS __all__ = ( - "CommunityCreateAction", - "CommunityDeleteAction", - "CommunityUpdateAction", - "DraftCreateAction", - "DraftEditAction", - "DraftFileUploadAction", - "DraftPublishAction", - "HookEventCreateAction", - "HookEventUpdateAction", - "HookRepoUpdateAction", - "OAuthApplicationCreateAction", - "OAuthApplicationDeleteAction", - "OAuthApplicationUpdateAction", - "OAuthGHDisconnectToken", - "OAuthLinkedAccountConnectAction", - "OAuthLinkedAccountDisconnectAction", - "OAuthServerTokenCreateAction", - "OAuthServerTokenDeleteAction", - "OAuthServerTokenUpdateAction", - "ReleaseReceiveAction", - "ReleaseUpdateAction", + "DRAFT_ACTIONS", + "FILES_ACTIONS", + "GITHUB_ACTIONS", + "OAUTH_ACTIONS", + "COMMUNITY_ACTIONS", "IGNORED_ACTIONS", "USER_ACTIONS", ) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/communities.py b/migrator/zenodo_rdm_migrator/actions/transform/communities.py index ee3a72f6..cb5e997e 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/communities.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/communities.py @@ -188,3 +188,10 @@ def _transform_data(self): "tx_id": self.tx.id, "community": {"slug": self.tx.operations[0]["after"]["id"]}, } + + +COMMUNITY_ACTIONS = [ + CommunityCreateAction, + CommunityDeleteAction, + CommunityUpdateAction, +] diff --git a/migrator/zenodo_rdm_migrator/actions/transform/drafts.py b/migrator/zenodo_rdm_migrator/actions/transform/drafts.py index d72f2f19..5859038d 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/drafts.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/drafts.py @@ -286,3 +286,10 @@ def _transform_data(self): # pragma: no cover parent=parent, draft=draft, ) + + +DRAFT_ACTIONS = [ + DraftCreateAction, + DraftEditAction, + DraftPublishAction, +] diff --git a/migrator/zenodo_rdm_migrator/actions/transform/files.py b/migrator/zenodo_rdm_migrator/actions/transform/files.py index 9c0b243f..92b7f471 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/files.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/files.py @@ -83,3 +83,8 @@ def _transform_data(self): file_instance=fi, file_record=fr, ) + + +FILES_ACTIONS = [ + DraftFileUploadAction, +] diff --git a/migrator/zenodo_rdm_migrator/actions/transform/github.py b/migrator/zenodo_rdm_migrator/actions/transform/github.py index 7d069677..5bf40db5 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/github.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/github.py @@ -456,3 +456,12 @@ def _transform_data(self): result["parent_doi"] = parent_doi return result + + +GITHUB_ACTIONS = [ + HookEventCreateAction, + HookEventUpdateAction, + HookRepoUpdateAction, + ReleaseReceiveAction, + ReleaseUpdateAction, +] diff --git a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py index 36eb7bca..940add5c 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/oauth.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/oauth.py @@ -432,3 +432,16 @@ def _transform_data(self): "token": OAuthServerTokenTransform()._transform(token), "user_identity": IdentityTransform()._transform(user_identity), } + + +OAUTH_ACTIONS = [ + OAuthApplicationCreateAction, + OAuthApplicationDeleteAction, + OAuthApplicationUpdateAction, + OAuthGHDisconnectToken, + OAuthLinkedAccountConnectAction, + OAuthLinkedAccountDisconnectAction, + OAuthServerTokenCreateAction, + OAuthServerTokenDeleteAction, + OAuthServerTokenUpdateAction, +] diff --git a/migrator/zenodo_rdm_migrator/transform/transactions.py b/migrator/zenodo_rdm_migrator/transform/transactions.py index 1dac77af..2cf372f7 100644 --- a/migrator/zenodo_rdm_migrator/transform/transactions.py +++ b/migrator/zenodo_rdm_migrator/transform/transactions.py @@ -11,29 +11,13 @@ from invenio_rdm_migrator.transform import BaseTxTransform from ..actions.transform import ( - CommunityCreateAction, - CommunityDeleteAction, - CommunityUpdateAction, - DraftCreateAction, - DraftEditAction, - DraftFileUploadAction, - HookEventCreateAction, - HookEventUpdateAction, - HookRepoUpdateAction, - OAuthApplicationCreateAction, - OAuthApplicationDeleteAction, - OAuthApplicationUpdateAction, - OAuthGHDisconnectToken, - OAuthLinkedAccountConnectAction, - OAuthLinkedAccountDisconnectAction, - OAuthServerTokenCreateAction, - OAuthServerTokenDeleteAction, - OAuthServerTokenUpdateAction, - ReleaseReceiveAction, - ReleaseUpdateAction, - UserDeactivationAction, - UserEditAction, - UserRegistrationAction, + COMMUNITY_ACTIONS, + DRAFT_ACTIONS, + FILES_ACTIONS, + GITHUB_ACTIONS, + IGNORED_ACTIONS, + OAUTH_ACTIONS, + USER_ACTIONS, ) @@ -41,27 +25,11 @@ class ZenodoTxTransform(BaseTxTransform): """Zenodo transaction transform.""" actions = [ - CommunityCreateAction, - CommunityDeleteAction, - CommunityUpdateAction, - DraftCreateAction, - DraftEditAction, - DraftFileUploadAction, - HookEventCreateAction, - HookEventUpdateAction, - HookRepoUpdateAction, - OAuthApplicationCreateAction, - OAuthApplicationDeleteAction, - OAuthApplicationUpdateAction, - OAuthGHDisconnectToken, - OAuthLinkedAccountConnectAction, - OAuthLinkedAccountDisconnectAction, - OAuthServerTokenCreateAction, - OAuthServerTokenDeleteAction, - OAuthServerTokenUpdateAction, - ReleaseReceiveAction, - ReleaseUpdateAction, - UserDeactivationAction, - UserEditAction, - UserRegistrationAction, + *GITHUB_ACTIONS, + *FILES_ACTIONS, + *DRAFT_ACTIONS, + *COMMUNITY_ACTIONS, + *OAUTH_ACTIONS, + *USER_ACTIONS, + *IGNORED_ACTIONS, ] From 672a809d19c928aa609dd66660418471cfc0fde3 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 01:53:35 +0200 Subject: [PATCH 24/35] migrator: set default communities review policy --- migrator/tests/transform/test_community_transform.py | 1 + migrator/zenodo_rdm_migrator/transform/entries/communities.py | 1 + 2 files changed, 2 insertions(+) diff --git a/migrator/tests/transform/test_community_transform.py b/migrator/tests/transform/test_community_transform.py index 5cc12ee1..5bbe2e27 100644 --- a/migrator/tests/transform/test_community_transform.py +++ b/migrator/tests/transform/test_community_transform.py @@ -49,6 +49,7 @@ def expected_rdm_community(): "visibility": "public", "member_policy": "open", "record_policy": "open", + "review_policy": "open", }, "metadata": { "page": "", diff --git a/migrator/zenodo_rdm_migrator/transform/entries/communities.py b/migrator/zenodo_rdm_migrator/transform/entries/communities.py index dc19d346..6e3e3403 100644 --- a/migrator/zenodo_rdm_migrator/transform/entries/communities.py +++ b/migrator/zenodo_rdm_migrator/transform/entries/communities.py @@ -62,6 +62,7 @@ def _access(self, entry): "visibility": "public", "member_policy": "open", "record_policy": "open", + "review_policy": "open", } def _bucket_id(self, entry): From d4a46fb4cce9578c4f7a8e18b3637835e94e4c5d Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 01:54:23 +0200 Subject: [PATCH 25/35] migrator: fix access conditions for parents --- .../tests/transform/test_record_transform.py | 15 ++++++-- .../transform/entries/parents.py | 14 +++++++ .../zenodo_rdm_migrator/transform/records.py | 38 ++++++++++++++++--- 3 files changed, 58 insertions(+), 9 deletions(-) diff --git a/migrator/tests/transform/test_record_transform.py b/migrator/tests/transform/test_record_transform.py index 5e39590e..13d4f10a 100644 --- a/migrator/tests/transform/test_record_transform.py +++ b/migrator/tests/transform/test_record_transform.py @@ -17,6 +17,7 @@ ZenodoRecordMetadataEntry, ) from zenodo_rdm_migrator.transform.records import ( + ZenodoDeletedRecordTransform, ZenodoDraftEntry, ZenodoRecordEntry, ZenodoRecordTransform, @@ -543,7 +544,6 @@ def expected_rdm_record_entry(): "gbif-dwc:identifiedByID": ["foo", "bar"], "gbif-dwc:recordedByID": ["foo", "bar"], }, - "tombstone": None, }, } @@ -562,6 +562,7 @@ def expected_rdm_record_parent(): "id": "10122", "access": {"owned_by": {"user": 1234}}, "communities": {"ids": ["zenodo", "migration"], "default": None}, + "permission_flags": {"can_community_manage_record": False}, "pids": { "doi": { "provider": "datacite", @@ -606,7 +607,11 @@ def test_record_transform_parent_record(zenodo_record_data, expected_rdm_record_ assert result == expected_rdm_record_parent -def test_record_transform_tombstone(zenodo_record_data, expected_rdm_record_entry): +def test_record_transform_tombstone( + zenodo_record_data, + expected_rdm_record_entry, + expected_rdm_record_parent, +): """Tests record tombstone transformation.""" zenodo_record_data["removal_date"] = "2023-09-15T11:39:52.929322" zenodo_record_data["removal_json"] = { @@ -622,8 +627,9 @@ def test_record_transform_tombstone(zenodo_record_data, expected_rdm_record_entr "citation_text": None, } - result = ZenodoRecordEntry().transform(zenodo_record_data) - assert result == expected_rdm_record_entry + result = ZenodoDeletedRecordTransform()._transform(zenodo_record_data) + assert result["record"] == expected_rdm_record_entry + assert result["parent"] == expected_rdm_record_parent ### @@ -1050,6 +1056,7 @@ def expected_rdm_draft_parent(): "access": {"owned_by": {"user": 1234}}, "communities": {}, "pids": {}, + "permission_flags": {"can_community_manage_record": False}, }, } diff --git a/migrator/zenodo_rdm_migrator/transform/entries/parents.py b/migrator/zenodo_rdm_migrator/transform/entries/parents.py index f774e44c..00eaca1c 100644 --- a/migrator/zenodo_rdm_migrator/transform/entries/parents.py +++ b/migrator/zenodo_rdm_migrator/transform/entries/parents.py @@ -86,8 +86,19 @@ def transform(self, entry): } owner = next(iter(entry["json"].get("owners", [])), None) if owner is not None: + transformed["json"].setdefault("access", {}) transformed["json"]["access"] = {"owned_by": {"user": owner}} + access_conditions = entry["json"].get("access_conditions") + if access_conditions: + transformed["json"].setdefault("access", {}) + transformed["json"]["access"]["settings"] = { + "allow_user_requests": True, + "allow_guest_requests": True, + "accept_conditions_text": access_conditions, + "secret_link_expiration": 30, + } + permission_flags = {} owner_comm_slugs = { comm["slug"] @@ -99,6 +110,9 @@ def transform(self, entry): has_only_managed_communities = comm_slugs < owner_comm_slugs if not has_only_managed_communities: permission_flags["can_community_manage_record"] = False + if entry["json"].get("access_right") != "open": + permission_flags["can_community_read_files"] = False + if permission_flags: transformed["json"]["permission_flags"] = permission_flags elif not self.partial: diff --git a/migrator/zenodo_rdm_migrator/transform/records.py b/migrator/zenodo_rdm_migrator/transform/records.py index 81a8e905..7ac78f2d 100644 --- a/migrator/zenodo_rdm_migrator/transform/records.py +++ b/migrator/zenodo_rdm_migrator/transform/records.py @@ -7,6 +7,7 @@ """Zenodo migrator records transformers.""" +from invenio_rdm_migrator.state import STATE from invenio_rdm_migrator.streams.records import RDMRecordTransform from zenodo_rdm_migrator.errors import InvalidTombstoneRecord @@ -76,11 +77,8 @@ def _parent(self, entry): "version_id": entry.get("version_id"), } parent_pid = entry["json"].get("conceptrecid") - transformed["json"] = {"id": parent_pid, "communities": {}} - owner = next(iter(entry["json"].get("owners", [])), None) - if owner is not None: - transformed["json"]["access"] = {"owned_by": {"user": owner}} - + communities = ParentRecordEntry()._communities(entry) + transformed["json"] = {"id": parent_pid, "communities": communities} pids = {} doi = entry["json"].get("doi") conceptdoi = entry["json"].get("conceptdoi") @@ -95,6 +93,36 @@ def _parent(self, entry): pids["doi"] = {"provider": "legacy", "identifier": ""} transformed["json"]["pids"] = pids + owner = next(iter(entry["json"].get("owners", [])), None) + if owner is not None: + transformed["json"].setdefault("access", {}) + transformed["json"]["access"] = {"owned_by": {"user": owner}} + + access_conditions = entry["json"].get("access_conditions") + if access_conditions: + transformed["json"].setdefault("access", {}) + transformed["json"]["access"]["settings"] = { + "allow_user_requests": True, + "allow_guest_requests": True, + "accept_conditions_text": access_conditions, + "secret_link_expiration": 30, + } + + permission_flags = {} + owner_comm_slugs = { + comm["slug"] + for comm in (STATE.COMMUNITIES.search("owner_id", owner) if owner else []) + } + comm_slugs = set(communities.get("ids", [])) + has_only_managed_communities = comm_slugs < owner_comm_slugs + if not has_only_managed_communities: + permission_flags["can_community_manage_record"] = False + if entry["json"].get("access_right") != "open": + permission_flags["can_community_read_files"] = False + + if permission_flags: + transformed["json"]["permission_flags"] = permission_flags + return transformed def _draft(self, entry): From cabb1f0682e69c9c9d4f982d8c2db830aee26061 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 02:34:42 +0200 Subject: [PATCH 26/35] migrator: additional Kafka extract logging --- migrator/zenodo_rdm_migrator/extract/kafka.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/migrator/zenodo_rdm_migrator/extract/kafka.py b/migrator/zenodo_rdm_migrator/extract/kafka.py index 8faf8d38..9b4014a7 100644 --- a/migrator/zenodo_rdm_migrator/extract/kafka.py +++ b/migrator/zenodo_rdm_migrator/extract/kafka.py @@ -323,6 +323,7 @@ def _yield_completed_tx(self, min_batch=None): for tx_state in lsn_sorted_tx: if not tx_state.complete: # We stop at the first non-completed transaction + self.logger.info(f"Earliest incomplete Tx: {tx_state}") break completed_tx_batch.append(tx_state) @@ -332,6 +333,8 @@ def _yield_completed_tx(self, min_batch=None): # If we didn't make a big enough batch we return if min_batch and len(completed_tx_batch) < min_batch: + next_missing_tx = lsn_sorted_tx[len(completed_tx_batch)] + self.logger.info(f"Couldn't gather {min_batch=}: {next_missing_tx=}") return for tx in completed_tx_batch: @@ -383,6 +386,9 @@ def run(self): yield from self._yield_completed_tx(min_batch=self.tx_buffer) + self.logger.info(f"Transactions in registry: {self.tx_registry.keys()}") + self.logger.info(f"{self._last_yielded_tx=}") + # If no new transactions, we don't need to sleep since consumers # have a timeout/sleep already via "consumer_timeout_ms". From f9dc98118f15648e6fa442f4ada5b72a8f86336e Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 14:41:31 +0200 Subject: [PATCH 27/35] migrator: add missing GitHub action and fix matching --- migrator/tests/actions/conftest.py | 2 +- .../actions/github/test_github_actions.py | 13 ++- .../github/test_github_actions_stream.py | 28 +---- .../actions/transform/github.py | 110 ++++++++++-------- 4 files changed, 70 insertions(+), 83 deletions(-) diff --git a/migrator/tests/actions/conftest.py b/migrator/tests/actions/conftest.py index f88c565b..33a5cf72 100644 --- a/migrator/tests/actions/conftest.py +++ b/migrator/tests/actions/conftest.py @@ -197,4 +197,4 @@ def database(engine): @pytest.fixture(scope="function") def pg_tx_load(db_uri, session): """Load instance configured with the DB session fixture.""" - return PostgreSQLTx(db_uri, _session=session) + return PostgreSQLTx(db_uri, _session=session, dry=False) diff --git a/migrator/tests/actions/github/test_github_actions.py b/migrator/tests/actions/github/test_github_actions.py index d67ed1a1..352dd07a 100644 --- a/migrator/tests/actions/github/test_github_actions.py +++ b/migrator/tests/actions/github/test_github_actions.py @@ -18,9 +18,10 @@ from zenodo_rdm_migrator.actions.transform.github import ( HookEventCreateAction, HookEventUpdateAction, - HookRepoUpdateAction, ReleaseReceiveAction, ReleaseUpdateAction, + RepoCreateAction, + RepoUpdateAction, ) ## @@ -41,12 +42,12 @@ def hook_enable_step1_tx(): return {"tx_id": 1, "operations": ops} -class TestHookRepoUpdateAction: +class TestRepoUpdateAction: """Create OAuth server token action tests.""" def test_matches_with_valid_data(self): assert ( - HookRepoUpdateAction.matches_action( + RepoUpdateAction.matches_action( Tx( id=1, operations=[ @@ -87,18 +88,18 @@ def test_matches_with_invalid_data(self): extra_op, ]: assert ( - HookRepoUpdateAction.matches_action(Tx(id=1, operations=invalid_ops)) + RepoUpdateAction.matches_action(Tx(id=1, operations=invalid_ops)) is False ) def test_transform_with_valid_data(self, hook_enable_step1_tx): - action = HookRepoUpdateAction( + action = RepoUpdateAction( Tx( id=hook_enable_step1_tx["tx_id"], operations=hook_enable_step1_tx["operations"], ) ) - assert isinstance(action.transform(), load.HookRepoUpdateAction) + assert isinstance(action.transform(), load.RepoUpdateAction) @pytest.fixture() diff --git a/migrator/tests/actions/github/test_github_actions_stream.py b/migrator/tests/actions/github/test_github_actions_stream.py index db967a57..ac604bc0 100644 --- a/migrator/tests/actions/github/test_github_actions_stream.py +++ b/migrator/tests/actions/github/test_github_actions_stream.py @@ -55,29 +55,8 @@ def test_github_hook_repo_update( assert repo.user_id == 86490 -@pytest.fixture(scope="function") -def db_token(database, session): - token = ServerToken( - id=156666, - client_id="SZLrR8ApZPeBjqj7uMB1JWXavhxebu6V0mwMtvMr", - user_id=123456, - token_type="bearer", - access_token="cH4ng3DzbXd4QTcrRjFMcTVMRHl3QlY2Rkdib0VwREY4aDhPcHo2dUt2ZnZ3OVVPa1BvRDl0L1NRZmFrdXNIU2hJR2JWc0NHZDZSVEhVT2JQcmdjS1E9PQ==", - refresh_token=None, - expires=None, - _scopes="tokens:generate user:email", - is_personal=True, - is_internal=False, - ) - - session.add(token) - session.commit() - - return session - - def test_github_hook_event_create( - database, db_token, pg_tx_load, test_extract_cls, tx_files + database, session, pg_tx_load, test_extract_cls, tx_files ): stream = Stream( name="action", @@ -86,10 +65,7 @@ def test_github_hook_event_create( load=pg_tx_load, ) stream.run() - - token = db_token.scalars(sa.select(ServerToken)).one() - assert token.expires - assert db_token.scalars(sa.select(WebhookEvent)).one() + assert session.scalars(sa.select(WebhookEvent)).one() def test_github_hook_disable( diff --git a/migrator/zenodo_rdm_migrator/actions/transform/github.py b/migrator/zenodo_rdm_migrator/actions/transform/github.py index 5bf40db5..77982cb7 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/github.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/github.py @@ -19,11 +19,53 @@ from ...transform.entries.records.records import ZenodoRecordEntry # -# Hooks +# Repository # -class HookRepoUpdateAction(TransformAction): +class RepoCreateAction(TransformAction): + """Zenodo to RDM GitHub repository enable/create. + + This will serve for hook enabling first phase and for disabling, as well as for + normal repository updates. + """ + + name = "gh-repo-create" + load_cls = load.RepoCreateAction + + @classmethod + def matches_action(cls, tx): + """Checks if the data corresponds with that required by the action.""" + ops = [(op["source"]["table"], op["op"]) for op in tx.operations] + has_release_ops = any(table == "github_releases" for table, _, in ops) + has_insert_and_update_repo_ops = all( + ("github_repositories", op_type) in ops + for op_type in (OperationType.INSERT, OperationType.UPDATE) + ) + return has_insert_and_update_repo_ops and not has_release_ops + + def _transform_data(self): + """Transforms the data and returns dictionary.""" + repo_insert_op = self.tx.operations[0] + assert repo_insert_op["op"] == OperationType.INSERT + repo = repo_insert_op["after"] + for op in self.tx.operations[1:]: + if ( + op["source"]["table"] == "github_repositories" + and op["op"] == OperationType.UPDATE + ): + self._microseconds_to_isodate( + data=op["after"], fields=["created", "updated"] + ) + repo.update(op["after"]) + + return { + "tx_id": self.tx.id, + "gh_repository": GitHubRepositoryTransform()._transform(repo), + } + + +class RepoUpdateAction(TransformAction): """Zenodo to RDM GitHub repository update of a webhook. This will serve for hook enabling first phase and for disabling, as well as for @@ -31,16 +73,14 @@ class HookRepoUpdateAction(TransformAction): """ name = "gh-hook-repo-update" - load_cls = load.HookRepoUpdateAction + load_cls = load.RepoUpdateAction @classmethod def matches_action(cls, tx): """Checks if the data corresponds with that required by the action.""" if len(tx.operations) != 1: return False - op = tx.operations[0] - return ( op["source"]["table"] == "github_repositories" and op["op"] == OperationType.UPDATE @@ -49,15 +89,16 @@ def matches_action(cls, tx): def _transform_data(self): """Transforms the data and returns dictionary.""" op = self.tx.operations[0] - self._microseconds_to_isodate(data=op["after"], fields=["created", "updated"]) - - result = { + return { "tx_id": self.tx.id, "gh_repository": GitHubRepositoryTransform()._transform(op["after"]), } - return result + +# +# Webhooks +# class HookEventCreateAction(TransformAction, JSONTransformMixin): @@ -73,34 +114,17 @@ class HookEventCreateAction(TransformAction, JSONTransformMixin): @classmethod def matches_action(cls, tx): """Checks if the data corresponds with that required by the action.""" - if len(tx.operations) == 1: - op = tx.operations[0] - return ( - op["source"]["table"] == "webhooks_events" - and op["op"] == OperationType.INSERT - ) - - if len(tx.operations) == 2: - rules = { - "webhooks_events": OperationType.INSERT, - "oauth2server_token": OperationType.UPDATE, - } - - for op in tx.operations: - rule = rules.pop(op["source"]["table"], None) - if not rule or rule != op["op"]: - return False - - return True - - return False + ops = [(op["source"]["table"], op["op"]) for op in tx.operations] + return ("webhooks_events", OperationType.INSERT) in ops def _transform_data(self): """Transforms the data and returns dictionary.""" webhook_event = None - server_token = None for op in self.tx.operations: - if op["source"]["table"] == "webhooks_events": + if ( + op["source"]["table"] == "webhooks_events" + and op["op"] == OperationType.INSERT + ): self._microseconds_to_isodate( data=op["after"], fields=["created", "updated"] ) @@ -114,18 +138,10 @@ def _transform_data(self): ], ) webhook_event = op["after"] - elif op["source"]["table"] == "oauth2server_token": - self._microseconds_to_isodate(data=op["after"], fields=["expires"]) - server_token = op["after"] - - result = { + return { "tx_id": self.tx.id, "webhook_event": IdentityTransform()._transform(webhook_event), } - if server_token: - result["oauth_token"] = OAuthServerTokenTransform()._transform(server_token) - - return result class HookEventUpdateAction(TransformAction, JSONTransformMixin): @@ -150,19 +166,12 @@ def matches_action(cls, tx): def _transform_data(self): """Transforms the data and returns dictionary.""" op = self.tx.operations[0] - self._load_json_fields( data=op["after"], fields=["payload", "payload_headers", "response", "response_headers"], ) self._microseconds_to_isodate(data=op["after"], fields=["created", "updated"]) - - result = { - "tx_id": self.tx.id, - "webhook_event": IdentityTransform()._transform(op["after"]), - } - - return result + return {"tx_id": self.tx.id, "webhook_event": op["after"]} # @@ -461,7 +470,8 @@ def _transform_data(self): GITHUB_ACTIONS = [ HookEventCreateAction, HookEventUpdateAction, - HookRepoUpdateAction, + RepoCreateAction, + RepoUpdateAction, ReleaseReceiveAction, ReleaseUpdateAction, ] From c95362a3bf077bc6fbc55f9ef75646e5aa9ead99 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 14:54:10 +0200 Subject: [PATCH 28/35] migrator: update "partial" Entry parsing --- .../actions/transform/communities.py | 2 +- .../transform/entries/parents.py | 19 +---- .../transform/entries/records/records.py | 72 ++++++++----------- 3 files changed, 33 insertions(+), 60 deletions(-) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/communities.py b/migrator/zenodo_rdm_migrator/actions/transform/communities.py index cb5e997e..30456566 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/communities.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/communities.py @@ -140,7 +140,7 @@ def _transform_data(self): community_src, *files_payloads = payloads result = { "tx_id": self.tx.id, - "community": ZenodoCommunityEntry().transform(community_src), + "community": ZenodoCommunityEntry(partial=True).transform(community_src), } # Transform the logo data if there is one diff --git a/migrator/zenodo_rdm_migrator/transform/entries/parents.py b/migrator/zenodo_rdm_migrator/transform/entries/parents.py index 00eaca1c..392f4a33 100644 --- a/migrator/zenodo_rdm_migrator/transform/entries/parents.py +++ b/migrator/zenodo_rdm_migrator/transform/entries/parents.py @@ -19,13 +19,6 @@ class ParentRecordEntry(Entry): """Parent record transform entry class.""" - def __init__(self, partial=False): - """Constructor. - - :param partial: a boolean enabling partial transformations, i.e. missing keys. - """ - self.partial = partial - def _communities(self, entry): result = {} communities = entry["json"].get("communities") @@ -59,16 +52,10 @@ def _pids(self, entry): def transform(self, entry): """Transform a parent.""" transformed = {} - # both created and updated are the same as the record - keys = ["created", "updated", "version_id"] - for key in keys: - try: - transformed[key] = entry[key] - except KeyError as ex: - if not self.partial: - raise KeyError(ex) - pass + self._load_partial(entry, transformed, ["created", "updated", "version_id"]) + + # We "manually" handle partial data for the JSON field if "json" in entry: # check if conceptrecid exists and bail otherwise. should not happen! # this is the case for some deposits and they should be fixed in prod as it diff --git a/migrator/zenodo_rdm_migrator/transform/entries/records/records.py b/migrator/zenodo_rdm_migrator/transform/entries/records/records.py index 34741250..22ae9335 100644 --- a/migrator/zenodo_rdm_migrator/transform/entries/records/records.py +++ b/migrator/zenodo_rdm_migrator/transform/entries/records/records.py @@ -115,13 +115,6 @@ class ZenodoDraftEntry(ZenodoRecordEntry): Many functions are identical to ZenodoRecordEntry but making the fields optional. """ - def __init__(self, partial=False): - """Constructor. - - :param partial: a boolean enabling partial transformations, i.e. missing keys. - """ - self.partial = partial - def _expires_at(self, entry): """Transform the expiry date of the draft.""" next_year = datetime.today() + timedelta(days=365) @@ -191,43 +184,36 @@ def _metadata(self, entry): def transform(self, entry): """Transform a record single entry.""" - keys = [ - "id", - "created", - "updated", - "version_id", - "index", - "bucket_id", - "media_bucket_id", - "expires_at", - "fork_version_id", - ] - transformed = {} - for key in keys: - func = getattr(self, "_" + key) - try: - transformed[key] = func(entry) - # this might mask nested missing keys, it is still a partial transformation - # full one (with more validation) should be checked on a record - except KeyError as ex: - if not self.partial: - raise KeyError(ex) - pass - + self._load_partial( + entry, + transformed, + [ + "id", + "created", + "updated", + "version_id", + "index", + "bucket_id", + "media_bucket_id", + "expires_at", + "fork_version_id", + ], + ) # json might give an inner KeyError that should not be masked - if "json" in entry: - transformed["json"] = { - "id": self._recid(entry), - "pids": self._pids(entry), - "files": self._files(entry), - "media_files": self._media_files(entry), - "metadata": self._metadata(entry), - "access": self._access(entry), - "custom_fields": self._custom_fields(entry), - } - elif not self.partial: - raise KeyError("json") - # else, pass + self._load_partial( + entry, + transformed, + [ + "id", + "pids", + "files", + "media_files", + "metadata", + "access", + "custom_fields", + ], + prefix="json", + ) return transformed From 675f00f291490b1880bebbbb2b471ee07ce6d7e7 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 14:55:16 +0200 Subject: [PATCH 29/35] migrator: better transaction yielding for Kafka extract --- migrator/zenodo_rdm_migrator/extract/kafka.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/migrator/zenodo_rdm_migrator/extract/kafka.py b/migrator/zenodo_rdm_migrator/extract/kafka.py index 9b4014a7..d88d634b 100644 --- a/migrator/zenodo_rdm_migrator/extract/kafka.py +++ b/migrator/zenodo_rdm_migrator/extract/kafka.py @@ -137,7 +137,7 @@ def __init__( ops_topic, tx_topic, last_tx, - tx_buffer=10, + tx_buffer=100, max_tx_info_fetch=200, max_ops_fetch=2000, config=None, @@ -301,7 +301,7 @@ def iter_ops(self): yield (tx_id, dict(key=op_msg.key, **op_msg.value)) - def _yield_completed_tx(self, min_batch=None): + def _yield_completed_tx(self, min_batch=None, max_batch=None): """Yields completed transactions. Important: we only yield the "earliest" commited transactions for which we have @@ -327,14 +327,15 @@ def _yield_completed_tx(self, min_batch=None): break completed_tx_batch.append(tx_state) - # if we reached the minimum batch size, we stop - if min_batch and len(completed_tx_batch) == min_batch: + # if we reached the maximum batch size, we stop + if max_batch and len(completed_tx_batch) == max_batch: break # If we didn't make a big enough batch we return if min_batch and len(completed_tx_batch) < min_batch: - next_missing_tx = lsn_sorted_tx[len(completed_tx_batch)] - self.logger.info(f"Couldn't gather {min_batch=}: {next_missing_tx=}") + if len(completed_tx_batch) < len(lsn_sorted_tx): + next_missing_tx = lsn_sorted_tx[len(completed_tx_batch)] + self.logger.info(f"Couldn't gather {min_batch=}: {next_missing_tx=}") return for tx in completed_tx_batch: From 8c485dc5965f142affe6cc71385f129516604392 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 15:44:51 +0200 Subject: [PATCH 30/35] migrator: adjust partial entries for parent and record --- .../zenodo_rdm_migrator/transform/entries/parents.py | 12 ++++++++++++ .../transform/entries/records/records.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/migrator/zenodo_rdm_migrator/transform/entries/parents.py b/migrator/zenodo_rdm_migrator/transform/entries/parents.py index 392f4a33..eeeb2221 100644 --- a/migrator/zenodo_rdm_migrator/transform/entries/parents.py +++ b/migrator/zenodo_rdm_migrator/transform/entries/parents.py @@ -19,6 +19,18 @@ class ParentRecordEntry(Entry): """Parent record transform entry class.""" + def _created(self, entry): + """Returns the creation date of the record.""" + return entry["created"] + + def _updated(self, entry): + """Returns the creation date of the record.""" + return entry["updated"] + + def _version_id(self, entry): + """Returns the version id of the record.""" + return entry["version_id"] + def _communities(self, entry): result = {} communities = entry["json"].get("communities") diff --git a/migrator/zenodo_rdm_migrator/transform/entries/records/records.py b/migrator/zenodo_rdm_migrator/transform/entries/records/records.py index 22ae9335..21f115ce 100644 --- a/migrator/zenodo_rdm_migrator/transform/entries/records/records.py +++ b/migrator/zenodo_rdm_migrator/transform/entries/records/records.py @@ -205,7 +205,7 @@ def transform(self, entry): entry, transformed, [ - "id", + ("id", "recid"), "pids", "files", "media_files", From b3454a07d7a2b84a6ba4cb94f4e477053e314b69 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 15:45:16 +0200 Subject: [PATCH 31/35] migrator: update ignored actions --- .../actions/transform/ignored.py | 59 +++++++++++++++++-- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/ignored.py b/migrator/zenodo_rdm_migrator/actions/transform/ignored.py index 6bc3bfe5..d21cde1d 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/ignored.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/ignored.py @@ -13,26 +13,73 @@ from invenio_rdm_migrator.streams.actions import load -class FileChecksumAction(TransformAction): +class IgnoredTransformAction(TransformAction): + """Transform ignored actions.""" + + load_cls = load.IgnoredAction + + def _transform_data(self): + """Return nothing.""" + return {} + + +class FileChecksumAction(IgnoredTransformAction): """Zenodo to RDM for file checksum.""" name = "file-checksum" - load_cls = load.IgnoredAction @classmethod def matches_action(cls, tx): - """Checks for a single .""" + """Checks for a single file instance update.""" table_ops = [(o["source"]["table"], o["op"]) for o in tx.operations] if table_ops == [("files_files", OperationType.UPDATE)]: changed_keys = tx.operations[0].get("after", {}).keys() return {"last_check", "last_check_at"} < changed_keys return False - def _transform_data(self): - """Transforms the data and returns an instance of the mapped_cls.""" - return {} + +class UserSessionAction(IgnoredTransformAction): + """Zenodo to RDM for user session.""" + + name = "user-session" + + @classmethod + def matches_action(cls, tx): + """Checks for a single .""" + table_ops = [(o["source"]["table"], o["op"]) for o in tx.operations] + return all(t == "accounts_user_session_activity" for t, _ in table_ops) + + +class GitHubSyncAction(IgnoredTransformAction): + """Zenodo to RDM for GitHub sync.""" + + name = "gh-sync" + + @classmethod + def matches_action(cls, tx): + """Checks for a single .""" + table_ops = [(o["source"]["table"], o["op"]) for o in tx.operations] + return table_ops == [("oauthclient_remoteaccount", OperationType.UPDATE)] + + +class OAuthReLoginAction(IgnoredTransformAction): + """Zenodo to RDM for OAuth re-login.""" + + name = "oauth-relogin" + + @classmethod + def matches_action(cls, tx): + """Checks for a single .""" + table_ops = [(o["source"]["table"], o["op"]) for o in tx.operations] + return table_ops == [ + ("accounts_user", OperationType.UPDATE), + ("oauthclient_remotetoken", OperationType.UPDATE), + ] IGNORED_ACTIONS = [ FileChecksumAction, + UserSessionAction, + GitHubSyncAction, + OAuthReLoginAction, ] From 339fa8f05698ed677d567113cac67a742716b901 Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 15:55:56 +0200 Subject: [PATCH 32/35] migrator: less verbose Kafka logging --- migrator/zenodo_rdm_migrator/extract/kafka.py | 1 - 1 file changed, 1 deletion(-) diff --git a/migrator/zenodo_rdm_migrator/extract/kafka.py b/migrator/zenodo_rdm_migrator/extract/kafka.py index d88d634b..4bc45405 100644 --- a/migrator/zenodo_rdm_migrator/extract/kafka.py +++ b/migrator/zenodo_rdm_migrator/extract/kafka.py @@ -387,7 +387,6 @@ def run(self): yield from self._yield_completed_tx(min_batch=self.tx_buffer) - self.logger.info(f"Transactions in registry: {self.tx_registry.keys()}") self.logger.info(f"{self._last_yielded_tx=}") # If no new transactions, we don't need to sleep since consumers From b562fb630dad803c32fbfe74b13ebbd3a3d6203e Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 15:56:15 +0200 Subject: [PATCH 33/35] migrator: fix optional user "active" key --- migrator/zenodo_rdm_migrator/actions/transform/users.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/users.py b/migrator/zenodo_rdm_migrator/actions/transform/users.py index e0c54d76..a27a1876 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/users.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/users.py @@ -117,7 +117,7 @@ def matches_action(cls, tx): for operation in tx.operations: if "accounts_user" == operation["source"]["table"]: update_not_active = ( - operation["after"]["active"] + operation["after"].get("active", True) or not operation["op"] == OperationType.UPDATE ) if update_not_active or account_seen: From 10eec02b3a27ae073cce5d5e52af695effd89aea Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 16:04:59 +0200 Subject: [PATCH 34/35] migrator: update ignored actions --- .../actions/transform/github.py | 13 +++-- .../actions/transform/ignored.py | 48 +++++++++++++++---- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/migrator/zenodo_rdm_migrator/actions/transform/github.py b/migrator/zenodo_rdm_migrator/actions/transform/github.py index 77982cb7..aa290f38 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/github.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/github.py @@ -78,13 +78,12 @@ class RepoUpdateAction(TransformAction): @classmethod def matches_action(cls, tx): """Checks if the data corresponds with that required by the action.""" - if len(tx.operations) != 1: - return False - op = tx.operations[0] - return ( - op["source"]["table"] == "github_repositories" - and op["op"] == OperationType.UPDATE - ) + ops = [(o["source"]["table"], o["op"]) for o in tx.operations] + if ops == [("github_repositories", OperationType.UPDATE)]: + changed_keys = tx.operations[0].get("after", {}).keys() - {"id"} + # i.e. anything more than "ping" webhook + return {"ping", "updated"} != changed_keys + return False def _transform_data(self): """Transforms the data and returns dictionary.""" diff --git a/migrator/zenodo_rdm_migrator/actions/transform/ignored.py b/migrator/zenodo_rdm_migrator/actions/transform/ignored.py index d21cde1d..8a683ac1 100644 --- a/migrator/zenodo_rdm_migrator/actions/transform/ignored.py +++ b/migrator/zenodo_rdm_migrator/actions/transform/ignored.py @@ -31,10 +31,10 @@ class FileChecksumAction(IgnoredTransformAction): @classmethod def matches_action(cls, tx): """Checks for a single file instance update.""" - table_ops = [(o["source"]["table"], o["op"]) for o in tx.operations] - if table_ops == [("files_files", OperationType.UPDATE)]: - changed_keys = tx.operations[0].get("after", {}).keys() - return {"last_check", "last_check_at"} < changed_keys + ops = [(o["source"]["table"], o["op"]) for o in tx.operations] + if ops == [("files_files", OperationType.UPDATE)]: + changed_keys = tx.operations[0].get("after", {}).keys() - {"id"} + return {"last_check", "last_check_at", "updated"} == changed_keys return False @@ -46,8 +46,20 @@ class UserSessionAction(IgnoredTransformAction): @classmethod def matches_action(cls, tx): """Checks for a single .""" - table_ops = [(o["source"]["table"], o["op"]) for o in tx.operations] - return all(t == "accounts_user_session_activity" for t, _ in table_ops) + ops = [(o["source"]["table"], o["op"]) for o in tx.operations] + user_updates = [o for o in ops if o == ("accounts_user", OperationType.UPDATE)] + session_activity_ops = [ + o for o in ops if o[0] == "accounts_user_session_activity" + ] + # Don't accidentally match user deactivations + if len(user_updates) == 1: + if tx.operations[0].get("after", {}).get("active") is False: + return False + # there might be one optional user update + multiple session_activirty ops + return ( + len(ops) == len(user_updates + session_activity_ops) + and len(session_activity_ops) > 1 + ) class GitHubSyncAction(IgnoredTransformAction): @@ -58,8 +70,23 @@ class GitHubSyncAction(IgnoredTransformAction): @classmethod def matches_action(cls, tx): """Checks for a single .""" - table_ops = [(o["source"]["table"], o["op"]) for o in tx.operations] - return table_ops == [("oauthclient_remoteaccount", OperationType.UPDATE)] + ops = [(o["source"]["table"], o["op"]) for o in tx.operations] + return ops == [("oauthclient_remoteaccount", OperationType.UPDATE)] + + +class GitHubPingAction(IgnoredTransformAction): + """Zenodo to RDM for GitHub sync.""" + + name = "gh-ping" + + @classmethod + def matches_action(cls, tx): + """Checks for a single .""" + ops = [(o["source"]["table"], o["op"]) for o in tx.operations] + if ops == [("github_repositories", OperationType.UPDATE)]: + changed_keys = tx.operations[0].get("after", {}).keys() - {"id"} + return {"ping", "updated"} == changed_keys + return False class OAuthReLoginAction(IgnoredTransformAction): @@ -70,8 +97,8 @@ class OAuthReLoginAction(IgnoredTransformAction): @classmethod def matches_action(cls, tx): """Checks for a single .""" - table_ops = [(o["source"]["table"], o["op"]) for o in tx.operations] - return table_ops == [ + ops = [(o["source"]["table"], o["op"]) for o in tx.operations] + return ops == [ ("accounts_user", OperationType.UPDATE), ("oauthclient_remotetoken", OperationType.UPDATE), ] @@ -81,5 +108,6 @@ def matches_action(cls, tx): FileChecksumAction, UserSessionAction, GitHubSyncAction, + GitHubPingAction, OAuthReLoginAction, ] From 7e62fd75f79074f99b4e02898731e2a208cd780f Mon Sep 17 00:00:00 2001 From: Alex Ioannidis Date: Sat, 23 Sep 2023 16:41:43 +0200 Subject: [PATCH 35/35] migrator: include Kafka offset in TxState --- migrator/tests/extract/test_kafka_extract.py | 2 ++ migrator/zenodo_rdm_migrator/extract/kafka.py | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/migrator/tests/extract/test_kafka_extract.py b/migrator/tests/extract/test_kafka_extract.py index 75be4adb..29542e61 100644 --- a/migrator/tests/extract/test_kafka_extract.py +++ b/migrator/tests/extract/test_kafka_extract.py @@ -264,6 +264,7 @@ def test_later_last_tx(mocker, kafka_data): def test_unchanged_fields(mocker): """Test the unchanged fields filtering for UPDATEs.""" tx_info = { + "offset": 0, "key": {"id": "563388795:1461026653952"}, "value": { "status": "END", @@ -276,6 +277,7 @@ def test_unchanged_fields(mocker): }, } op = { + "offset": 0, "key": { "id": "2c5e1797-e030-40e2-b32f-99335730a39d", }, diff --git a/migrator/zenodo_rdm_migrator/extract/kafka.py b/migrator/zenodo_rdm_migrator/extract/kafka.py index 4bc45405..49dc92b9 100644 --- a/migrator/zenodo_rdm_migrator/extract/kafka.py +++ b/migrator/zenodo_rdm_migrator/extract/kafka.py @@ -24,10 +24,11 @@ class _TxState: """Transaction state, internally used in the Kafka extract only.""" - def __init__(self, id, commit_lsn=None, info=None): + def __init__(self, id, commit_lsn=None, commit_offset=None, info=None): """Constructor.""" self.id = id self.commit_lsn = commit_lsn + self.commit_offset = commit_offset self.info = info # We order operations based on the Postgres LSN self.ops = SortedList(key=lambda o: o["source"]["lsn"]) @@ -265,7 +266,7 @@ def iter_tx_info(self): continue elif tx_msg.value["status"] == "END": consumer.commit() - yield ((tx_id, tx_lsn), tx_msg.value) + yield ((tx_id, tx_lsn, tx_msg.offset), tx_msg.value) @staticmethod def _filter_unchanged_values(msg): @@ -341,7 +342,7 @@ def _yield_completed_tx(self, min_batch=None, max_batch=None): for tx in completed_tx_batch: del self.tx_registry[tx.id] # Keep track of the last yielded transaction ID - self._last_yielded_tx = (tx.id, tx.commit_lsn) + self._last_yielded_tx = (tx.id, tx.commit_lsn, tx.commit_offset) yield Tx(id=tx.id, commit_lsn=tx.commit_lsn, operations=list(tx.ops)) def run(self): @@ -358,14 +359,16 @@ def run(self): tx_info_stream, self.max_tx_info_fetch ) self.logger.info("Started streaming tx info") - for (tx_id, tx_lsn), tx_info in tx_info_stream: + for (tx_id, tx_lsn, offset), tx_info in tx_info_stream: if tx_id in self.tx_registry: self.tx_registry[tx_id].info = tx_info self.tx_registry[tx_id].commit_lsn = tx_lsn + self.tx_registry[tx_id].commit_offset = offset else: self.tx_registry[tx_id] = _TxState( tx_id, commit_lsn=tx_lsn, + commit_offset=offset, info=tx_info, ) self.logger.info("Stopped streaming tx info")