From cf63f843faeab098ac08bc7d0164bc947f1cd319 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Fri, 27 May 2022 00:10:36 +0530 Subject: [PATCH 01/48] Fix mocking of entity.build_entity_query The build_entity_query method of all entities is mocked in test_index_by_fk_* tests. These mocks are never reset after the test is over. This causes the mock to be visible instead of the actual method in other tests causing those to fail. Fix the issue by using mock.patch and cleaning up after test run is over. --- test/test_amqp_handler.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/test/test_amqp_handler.py b/test/test_amqp_handler.py index d26e9f77..0f581883 100644 --- a/test/test_amqp_handler.py +++ b/test/test_amqp_handler.py @@ -115,6 +115,11 @@ def setUp(self): self.handler.cores[self.entity_type] = mock.Mock() + for entity_type, entity in SCHEMA.items(): + patcher = mock.patch.object(entity, 'build_entity_query') + patcher.start() + self.addCleanup(patcher.stop) + def test_delete_callback(self): entity_gid = u"90d7709d-feba-47e6-a2d1-8770da3c3d9c" self.message = Amqp_Message( @@ -140,7 +145,7 @@ def test_index_by_fk_1(self): self.handler = handler.Handler(SCHEMA.keys()) for entity_type, entity in SCHEMA.items(): self.handler.cores[entity_type] = mock.Mock() - entity.build_entity_query = mock.MagicMock() + self.handler._index_by_fk(parsed_message) calls = self.handler.db_session().execute.call_args_list self.assertEqual(len(calls), 6) @@ -174,7 +179,7 @@ def test_index_by_fk_2(self): self.handler = handler.Handler(SCHEMA.keys()) for entity_type, entity in SCHEMA.items(): self.handler.cores[entity_type] = mock.Mock() - entity.build_entity_query = mock.MagicMock() + self.handler._index_by_fk(parsed_message) calls = self.handler.db_session().execute.call_args_list self.assertEqual(len(calls), 1) @@ -193,7 +198,7 @@ def test_index_by_fk_3(self): self.handler = handler.Handler(SCHEMA.keys()) for entity_type, entity in SCHEMA.items(): self.handler.cores[entity_type] = mock.Mock() - entity.build_entity_query = mock.MagicMock() + self.handler._index_by_fk(parsed_message) calls = self.handler.db_session().execute.call_args_list self.assertEqual(len(calls), 1) From 166ba520361e54aa1d9e43a15c286cf298b70db6 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Sat, 7 May 2022 15:00:24 +0530 Subject: [PATCH 02/48] Setup real database for tests The test config should be copied inside the Dockerfile manually. Copying config.test.ini to config.ini in a local or jenkins script before building the docker image will not work because config.ini is included in .dockerignore. Therefore, config.ini will be ignored and not be copied to the image otherwise. Co-authored-by: yvanzo --- config.test.ini | 25 +++++++++++++++++++++++++ docker/Dockerfile.test | 10 ++++++++-- docker/docker-compose.test.yml | 12 +++++++++++- test.sh | 12 ++++++++++++ 4 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 config.test.ini create mode 100755 test.sh diff --git a/config.test.ini b/config.test.ini new file mode 100644 index 00000000..80258a08 --- /dev/null +++ b/config.test.ini @@ -0,0 +1,25 @@ +[database] +dbname = musicbrainz_test +host = musicbrainz_db +password = +port = 5432 +user = musicbrainz + +[solr] +uri = SKIP +batch_size = 60 + +[sir] +import_threads = 2 +query_batch_size = 20000 +wscompat = on + +[rabbitmq] +host = SKIP +user = SKIP +password = SKIP +vhost = SKIP +prefetch_count = 350 + +[sentry] +dsn = SKIP diff --git a/docker/Dockerfile.test b/docker/Dockerfile.test index 93933d6e..46dd80a8 100644 --- a/docker/Dockerfile.test +++ b/docker/Dockerfile.test @@ -3,6 +3,10 @@ FROM metabrainz/python:2.7-20220421 RUN mkdir /code WORKDIR /code +ENV DOCKERIZE_VERSION v0.6.1 +RUN wget https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz \ + && tar -C /usr/local/bin -xzvf dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz + # Python dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ @@ -23,8 +27,10 @@ RUN pip install -r requirements.txt RUN pip install -r requirements_dev.txt COPY . /code/ +RUN cp config.test.ini config.ini -CMD py.test --junitxml=/data/test_report.xml \ +CMD dockerize -wait tcp://musicbrainz_db:5432 -timeout 600s \ + bash -c "py.test --junitxml=/data/test_report.xml \ --cov=sir \ --cov-report xml:/data/coverage.xml \ - --cov-report html:/data/coverage-html + --cov-report html:/data/coverage-html" diff --git a/docker/docker-compose.test.yml b/docker/docker-compose.test.yml index 4ce507d9..d489b41f 100644 --- a/docker/docker-compose.test.yml +++ b/docker/docker-compose.test.yml @@ -1,8 +1,18 @@ # Docker Compose file for testing -version: "2" +version: "3.8" services: test: build: context: .. dockerfile: ./docker/Dockerfile.test + volumes: + - "..:/code" + depends_on: + - musicbrainz_db + + musicbrainz_db: + image: metabrainz/musicbrainz-test-database:production + environment: + POSTGRES_HOST_AUTH_METHOD: trust + PGDATA: /var/lib/postgresql-musicbrainz/data diff --git a/test.sh b/test.sh new file mode 100755 index 00000000..b8f1e3f8 --- /dev/null +++ b/test.sh @@ -0,0 +1,12 @@ +docker-compose -f docker/docker-compose.test.yml -p sir-test up -d musicbrainz_db +docker-compose -f docker/docker-compose.test.yml -p sir-test build +docker-compose -f docker/docker-compose.test.yml -p sir-test run test \ + dockerize -wait tcp://musicbrainz_db:5432 -timeout 600s \ + bash -c "py.test --junitxml=/data/test_report.xml \ + --cov=sir \ + --cov-report xml:/data/coverage.xml \ + --cov-report html:/data/coverage-html \ + $*" +RET=$? +docker-compose -f docker/docker-compose.test.yml -p sir-test down +exit $RET From fc5cf88370c5f3746fefaf61493226da0754101d Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Sat, 7 May 2022 15:39:19 +0530 Subject: [PATCH 03/48] Fix SQLAlchemy warning to use text directly --- sir/querying.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sir/querying.py b/sir/querying.py index 52c4dafd..9821b143 100644 --- a/sir/querying.py +++ b/sir/querying.py @@ -3,7 +3,7 @@ import logging -from sqlalchemy import func +from sqlalchemy import func, text from sqlalchemy.orm.attributes import InstrumentedAttribute from sqlalchemy.orm.interfaces import ONETOMANY, MANYTOONE from sqlalchemy.orm.properties import RelationshipProperty @@ -107,10 +107,10 @@ def iter_bounds(db_session, column, batch_size, importlimit): from_self(column) if batch_size > 1: - q = q.filter("rownum %% %d=1" % batch_size) + q = q.filter(text("rownum % :batch_size=1").bindparams(batch_size=batch_size)) if importlimit: - q = q.filter("rownum <= %d" % (importlimit)) + q = q.filter(text("rownum <= :import_limit").bindparams(import_limit=importlimit)) intervals = [id for id in q] bounds = [] From b73e209236d6162a199bcae37a48a273da9e01c4 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 18 May 2022 19:52:58 +0530 Subject: [PATCH 04/48] Remove volume mount from docker-compose.test.yml This erases all the code from the image built in CI. So remove the mount, the downside is that we need to build the image for tests each time. An alternative could be to have separate docker compose files for CI and local tests, perhaps even using one as overlay over the other. --- docker/docker-compose.test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/docker/docker-compose.test.yml b/docker/docker-compose.test.yml index d489b41f..aac266d5 100644 --- a/docker/docker-compose.test.yml +++ b/docker/docker-compose.test.yml @@ -6,8 +6,6 @@ services: build: context: .. dockerfile: ./docker/Dockerfile.test - volumes: - - "..:/code" depends_on: - musicbrainz_db From 55c5560488e264ae4e817a52583bda6ddad98374 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Tue, 31 May 2022 19:33:58 +0530 Subject: [PATCH 05/48] Optionally accept a session parameter in indexing functions We want to these indexing functions with real database tests. Thus, accept an optional session parameter in these functions. In tests, we create a session, start a transaction, insert data from sql files, query the data in the same transaction and at the end of the test rollback the transaction. As a result, we are able to get away without resetting the database in between the tests. If we don't pass the same session here, then it may not be possible to use this method because changes in a transaction are not visible outside it. This code earlier used to be inside a with but the queries executed here are only select, so we can do without the `with` here as well. --- sir/indexing.py | 53 ++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/sir/indexing.py b/sir/indexing.py index 8c7d64bb..a8d46ec0 100644 --- a/sir/indexing.py +++ b/sir/indexing.py @@ -237,7 +237,7 @@ def live_index_entity(entity_name, ids, data_queue): _query_database(entity_name, condition, data_queue) -def _query_database(entity_name, condition, data_queue): +def _query_database(entity_name, condition, data_queue, session=None): """ Retrieve rows for a single entity type identified by ``entity_name``, convert them to a dict with :func:`sir.indexing.query_result_to_dict` and @@ -254,30 +254,33 @@ def _query_database(entity_name, condition, data_queue): search_entity = SCHEMA[entity_name] model = search_entity.model row_converter = search_entity.query_result_to_dict - with util.db_session_ctx(util.db_session()) as session: - query = search_entity.query.filter(condition).with_session(session) - total_records = 0 - for row in query: - if not PROCESS_FLAG.value: - return - try: - data_queue.put(row_converter(row)) - except ValueError: - logger.info("Skipping %s with id %s. " - "The most likely cause of this is an " - "unsupported control character in the " - "data.", - entity_name, - row.id) - except Exception as exc: - logger.error("Failed to import %s with id %s", - entity_name, - row.id) - logger.exception(exc) - raise - else: - total_records += 1 - logger.debug("Retrieved %s records in %s", total_records, model) + + if session is None: + session = util.db_session() + + query = search_entity.query.filter(condition).with_session(session) + total_records = 0 + for row in query: + if not PROCESS_FLAG.value: + return + try: + data_queue.put(row_converter(row)) + except ValueError: + logger.info("Skipping %s with id %s. " + "The most likely cause of this is an " + "unsupported control character in the " + "data.", + entity_name, + row.id) + except Exception as exc: + logger.error("Failed to import %s with id %s", + entity_name, + row.id) + logger.exception(exc) + raise + else: + total_records += 1 + logger.debug("Retrieved %s records in %s", total_records, model) def queue_to_solr(queue, batch_size, solr_connection): From ce8bf447d70770ad7dab64713e6b861608b37888 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Sat, 7 May 2022 15:50:59 +0530 Subject: [PATCH 06/48] Add indexing tests for entities using actual data --- sir/indexing.py | 4 +- sir/util.py | 19 +- test/sql/annotation.sql | 37 ++ test/sql/area.sql | 16 + test/sql/artist.sql | 37 ++ test/sql/cdstub.sql | 10 + test/sql/editor.sql | 47 +++ test/sql/event.sql | 23 ++ test/sql/instrument.sql | 41 ++ test/sql/label.sql | 19 + test/sql/place.sql | 11 + test/sql/recording.sql | 44 ++ test/sql/release-group.sql | 39 ++ test/sql/release.sql | 61 +++ test/sql/series.sql | 51 +++ test/sql/tag.sql | 33 ++ test/sql/url.sql | 12 + test/sql/work.sql | 32 ++ test/test_indexing_real_data.py | 684 ++++++++++++++++++++++++++++++++ 19 files changed, 1212 insertions(+), 8 deletions(-) create mode 100644 test/sql/annotation.sql create mode 100644 test/sql/area.sql create mode 100644 test/sql/artist.sql create mode 100644 test/sql/cdstub.sql create mode 100644 test/sql/editor.sql create mode 100644 test/sql/event.sql create mode 100644 test/sql/instrument.sql create mode 100644 test/sql/label.sql create mode 100644 test/sql/place.sql create mode 100644 test/sql/recording.sql create mode 100644 test/sql/release-group.sql create mode 100644 test/sql/release.sql create mode 100644 test/sql/series.sql create mode 100644 test/sql/tag.sql create mode 100644 test/sql/url.sql create mode 100644 test/sql/work.sql create mode 100644 test/test_indexing_real_data.py diff --git a/sir/indexing.py b/sir/indexing.py index a8d46ec0..dc1c84db 100644 --- a/sir/indexing.py +++ b/sir/indexing.py @@ -199,7 +199,7 @@ def _index_entity_process_wrapper(args, live=False): raise -def index_entity(entity_name, bounds, data_queue): +def index_entity(entity_name, bounds, data_queue, session=None): """ Retrieve rows for a single entity type identified by ``entity_name``, convert them to a dict with :func:`sir.indexing.query_result_to_dict` and @@ -217,7 +217,7 @@ def index_entity(entity_name, bounds, data_queue): condition = and_(model.id >= lower_bound, model.id < upper_bound) else: condition = model.id >= lower_bound - _query_database(entity_name, condition, data_queue) + _query_database(entity_name, condition, data_queue, session) def live_index_entity(entity_name, ids, data_queue): diff --git a/sir/util.py b/sir/util.py index f2f6b40d..32eb6c47 100644 --- a/sir/util.py +++ b/sir/util.py @@ -36,20 +36,27 @@ def __str__(self): self.actual) -def db_session(): +def engine(): """ - Creates a new :class:`sqla:sqlalchemy.orm.session.sessionmaker`. + Create a new :class:`sqla:sqlalchemy.engine.Engine`. - :rtype: :class:`sqla:sqlalchemy.orm.session.sessionmaker` + :rtype: :class:`sqla:sqlalchemy.engine.Engine` """ cget = partial(config.CFG.get, "database") cdict = {"username": cget("user")} for key in ["password", "host", "port"]: cdict[key] = cget(key) cdict["database"] = cget("dbname") - e = create_engine(URL("postgresql", **cdict), server_side_cursors=False) - S = sessionmaker(bind=e) - return S + return create_engine(URL("postgresql", **cdict), server_side_cursors=False) + + +def db_session(): + """ + Creates a new :class:`sqla:sqlalchemy.orm.session.sessionmaker`. + + :rtype: :class:`sqla:sqlalchemy.orm.session.sessionmaker` + """ + return sessionmaker(bind=engine()) @contextmanager diff --git a/test/sql/annotation.sql b/test/sql/annotation.sql new file mode 100644 index 00000000..b7c4f04b --- /dev/null +++ b/test/sql/annotation.sql @@ -0,0 +1,37 @@ +INSERT INTO editor (id, name, password, privs, email, website, bio, member_since, + email_confirm_date, last_login_date, ha1) + VALUES (1, 'new_editor', '{CLEARTEXT}password', 1+8+32+512, 'test@email.com', 'http://test.website', + 'biography', '1989-07-23', '2005-10-20', '2013-04-05', 'aa550c5b01407ef1f3f0d16daf9ec3c8'); + +INSERT INTO area (id, gid, name, type) VALUES + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1), + (222, '489ce91b-6658-3307-9877-795b68554c98', 'United States', 1); +INSERT INTO country_area (area) VALUES (221), (222); +INSERT INTO iso_3166_1 (area, code) VALUES (221, 'GB'), (222, 'US'); + +INSERT INTO artist + (id, gid, name, sort_name, type, gender, area, + begin_area, end_area, + begin_date_year, begin_date_month, begin_date_day, + end_date_year, end_date_month, end_date_day, comment, + last_updated) + VALUES + (3, '745c079d-374e-4436-9448-da92dedef3ce', 'Test Artist', 'Artist, Test', 1, 1, 221, 221, 221, + 2008, 01, 02, 2009, 03, 04, 'Yet Another Test Artist', + '2009-07-09'); + +INSERT INTO artist (id, gid, name, sort_name) VALUES + (4, '945c079d-374e-4436-9448-da92dedef3cf', 'Minimal Artist', 'Minimal Artist'), + (5, 'dc19b13a-5ca5-44f5-8f0e-0c37a8ab1958', 'Annotated Artist A', 'Annotated Artist A'), + (6, 'ca4c2228-227c-4904-932a-dff442c091ea', 'Annotated Artist B', 'Annotated Artist B'); + +UPDATE artist_meta SET rating=70, rating_count=4 WHERE id=3; + +INSERT INTO annotation (id, editor, text) VALUES (1, 1, 'Test annotation 1'); +INSERT INTO annotation (id, editor, text) VALUES (2, 1, 'Test annotation 2'); +INSERT INTO annotation (id, editor, text) VALUES (3, 1, 'Duplicate annotation'); +INSERT INTO annotation (id, editor, text) VALUES (4, 1, 'Duplicate annotation'); + +INSERT INTO artist_annotation (artist, annotation) VALUES (3, 1), (4, 2), (5, 3), (6, 4); + +INSERT INTO artist_gid_redirect VALUES ('a4ef1d08-962e-4dd6-ae14-e42a6a97fc11', 3); diff --git a/test/sql/area.sql b/test/sql/area.sql new file mode 100644 index 00000000..5419e27e --- /dev/null +++ b/test/sql/area.sql @@ -0,0 +1,16 @@ +INSERT INTO area (id, gid, name, type) VALUES + ( 13, '106e0bec-b638-3b37-b731-f53d507dc00e', 'Australia', 1), + ( 81, '85752fda-13c4-31a3-bee5-0e5cb1f51dad', 'Germany', 1), + (107, '2db42837-c832-3c27-b4a3-08198f75693c', 'Japan', 1), + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1), + (222, '489ce91b-6658-3307-9877-795b68554c98', 'United States', 1), + (241, '89a675c2-3e37-3518-b83c-418bad59a85a', 'Europe', 1), + (5126, '3f179da4-83c6-4a28-a627-e46b4a8ff1ed', 'Sydney', 3); +INSERT INTO country_area (area) VALUES ( 13), ( 81), (107), (221), (222), (241); +INSERT INTO iso_3166_1 (area, code) VALUES ( 13, 'AU'), ( 81, 'DE'), (107, 'JP'), (221, 'GB'), (222, 'US'), (241, 'XE'); + +INSERT INTO area_alias (id, name, sort_name, area, edits_pending) + VALUES (1, 'オーストラリア', 'オーストラリア', 13, 0); + +INSERT INTO link VALUES (118734, 356, NULL, NULL, NULL, NULL, NULL, NULL, 0, '2013-05-17 20:05:50.534145+00', FALSE); +INSERT INTO l_area_area VALUES (4892, 118734, 13, 5126, 0, '2013-05-24 20:32:44.702487+00', 0, '', ''); diff --git a/test/sql/artist.sql b/test/sql/artist.sql new file mode 100644 index 00000000..b7c4f04b --- /dev/null +++ b/test/sql/artist.sql @@ -0,0 +1,37 @@ +INSERT INTO editor (id, name, password, privs, email, website, bio, member_since, + email_confirm_date, last_login_date, ha1) + VALUES (1, 'new_editor', '{CLEARTEXT}password', 1+8+32+512, 'test@email.com', 'http://test.website', + 'biography', '1989-07-23', '2005-10-20', '2013-04-05', 'aa550c5b01407ef1f3f0d16daf9ec3c8'); + +INSERT INTO area (id, gid, name, type) VALUES + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1), + (222, '489ce91b-6658-3307-9877-795b68554c98', 'United States', 1); +INSERT INTO country_area (area) VALUES (221), (222); +INSERT INTO iso_3166_1 (area, code) VALUES (221, 'GB'), (222, 'US'); + +INSERT INTO artist + (id, gid, name, sort_name, type, gender, area, + begin_area, end_area, + begin_date_year, begin_date_month, begin_date_day, + end_date_year, end_date_month, end_date_day, comment, + last_updated) + VALUES + (3, '745c079d-374e-4436-9448-da92dedef3ce', 'Test Artist', 'Artist, Test', 1, 1, 221, 221, 221, + 2008, 01, 02, 2009, 03, 04, 'Yet Another Test Artist', + '2009-07-09'); + +INSERT INTO artist (id, gid, name, sort_name) VALUES + (4, '945c079d-374e-4436-9448-da92dedef3cf', 'Minimal Artist', 'Minimal Artist'), + (5, 'dc19b13a-5ca5-44f5-8f0e-0c37a8ab1958', 'Annotated Artist A', 'Annotated Artist A'), + (6, 'ca4c2228-227c-4904-932a-dff442c091ea', 'Annotated Artist B', 'Annotated Artist B'); + +UPDATE artist_meta SET rating=70, rating_count=4 WHERE id=3; + +INSERT INTO annotation (id, editor, text) VALUES (1, 1, 'Test annotation 1'); +INSERT INTO annotation (id, editor, text) VALUES (2, 1, 'Test annotation 2'); +INSERT INTO annotation (id, editor, text) VALUES (3, 1, 'Duplicate annotation'); +INSERT INTO annotation (id, editor, text) VALUES (4, 1, 'Duplicate annotation'); + +INSERT INTO artist_annotation (artist, annotation) VALUES (3, 1), (4, 2), (5, 3), (6, 4); + +INSERT INTO artist_gid_redirect VALUES ('a4ef1d08-962e-4dd6-ae14-e42a6a97fc11', 3); diff --git a/test/sql/cdstub.sql b/test/sql/cdstub.sql new file mode 100644 index 00000000..9a3f5029 --- /dev/null +++ b/test/sql/cdstub.sql @@ -0,0 +1,10 @@ +INSERT INTO release_raw (id, title, artist, added, last_modified, lookup_count, modify_count, source, barcode, comment) + VALUES (1, 'Test Stub', 'Test Artist', '2000-01-01 0:00', '2001-01-01 0:00', 10, 1, 0, '837101029192', 'this is a comment'); + +INSERT INTO track_raw (release, title, artist, sequence) + VALUES (1, 'Track title 1', '', 0); +INSERT INTO track_raw (release, title, artist, sequence) + VALUES (1, 'Track title 2', '', 1); + +INSERT INTO cdtoc_raw (release, discid, track_count, leadout_offset, track_offset) + VALUES (1, 'YfSgiOEayqN77Irs.VNV.UNJ0Zs-', 2, 20000, '{150,10000}'); diff --git a/test/sql/editor.sql b/test/sql/editor.sql new file mode 100644 index 00000000..9049b486 --- /dev/null +++ b/test/sql/editor.sql @@ -0,0 +1,47 @@ +INSERT INTO editor (id, name, password, privs, email, website, bio, member_since, + email_confirm_date, last_login_date, ha1) + VALUES (1, 'new_editor', '{CLEARTEXT}password', 1+8+32+512, 'test@email.com', 'http://test.website', + 'biography', '1989-07-23', '2005-10-20', '2013-04-05', 'aa550c5b01407ef1f3f0d16daf9ec3c8'), + (2, 'Alice', '{CLEARTEXT}secret1', 0, 'alice@example.com', 'http://example.com', + 'second biography', '2007-07-23', '2007-10-20', now(), 'e7f46e4f25ae38fcc952ef2b7edf0de9'), + (3, 'kuno', '{CLEARTEXT}byld', 0, 'kuno@example.com', 'http://frob.nl', + 'donation check test user', '2010-03-25', '2010-03-25', now(), '00863261763ed5029ea051f87c4bbec3'); + +INSERT INTO editor_preference (editor, name, value) + VALUES (1, 'datetime_format', '%m/%d/%Y %H:%M:%S'), + (1, 'timezone', 'UTC'), + (2, 'datetime_format', '%m/%d/%Y %H:%M:%S'), + (2, 'timezone', 'UTC'), + (2, 'public_ratings', '0'), + (2, 'public_tags', '0'); + +INSERT INTO artist (id, gid, name, sort_name) + VALUES (1, 'a9d99e40-72d7-11de-8a39-0800200c9a66', 'Name', 1); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'Name', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); +INSERT INTO artist_credit_name (artist_credit, artist, name, position, join_phrase) + VALUES (1, 1, 'Name', 0, ''); + +INSERT INTO release_group (id, gid, name, artist_credit) + VALUES (1, '3b4faa80-72d9-11de-8a39-0800200c9a66', 'Arrival', 1); + +INSERT INTO release (id, gid, name, artist_credit, release_group) + VALUES (1, 'f34c079d-374e-4436-9448-da92dedef3ce', 'Arrival', 1, 1), + (2, 'a34c079d-374e-4436-9448-da92dedef3ce', 'Arrival', 1, 1), + (3, 'b34c079d-374e-4436-9448-da92dedef3ce', 'Arrival', 1, 1), + (4, 'c34c079d-374e-4436-9448-da92dedef3ce', 'Arrival', 1, 1); + +INSERT INTO editor_subscribe_editor (editor, subscribed_editor, last_edit_sent) + VALUES (2, 1, 3); + +INSERT INTO editor_collection (id, gid, editor, name, public, type) + VALUES (1, 'f34c079d-374e-4436-9448-da92dedef3ce', 2, 'kunos collection', FALSE, 1), + (2, 'd34c079d-374e-4436-9448-da92dedef3ce', 1, 'new_collection', TRUE, 1); + +INSERT INTO editor_collection_release (collection, release) + VALUES (1, 1), (1, 2); + +INSERT INTO annotation (editor) VALUES (2); -- so Alice is not fully deleted + +INSERT INTO old_editor_name (name) VALUES ('im_gone'); diff --git a/test/sql/event.sql b/test/sql/event.sql new file mode 100644 index 00000000..49579c96 --- /dev/null +++ b/test/sql/event.sql @@ -0,0 +1,23 @@ +INSERT INTO area (id, gid, name, type) +VALUES (3983, 'b9576171-3434-4d1b-8883-165ed6e65d2f', 'Kensington and Chelsea', 2) + , (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1) + , (38, '71bbafaa-e825-3e15-8ca9-017dcad1748b', 'Canada', 1); + +INSERT INTO country_area (area) VALUES ( 38), (221); +INSERT INTO iso_3166_1 (area, code) VALUES ( 38, 'CA'), (221, 'GB'); + +INSERT INTO place (id, gid, name, type, address, area, coordinates, begin_date_year) + VALUES (729, '4352063b-a833-421b-a420-e7fb295dece0', 'Royal Albert Hall', 2, 'Kensington Gore, London SW7 2AP', 3983, '(51.50105,-0.17748)', 1871); + +INSERT INTO event (id, gid, name, begin_date_year, begin_date_month, begin_date_day, end_date_year, end_date_month, end_date_day, time, type, cancelled, setlist, comment, ended) + VALUES (59357, 'ca1d24c1-1999-46fd-8a95-3d4108df5cb2', 'BBC Open Music Prom', 2022, 9, 1, 2022, 9, 1, '19:30:00', 1, 'f', NULL, '2022, Prom 60', 't'); + +INSERT INTO artist (id, gid, name, sort_name, begin_date_year, begin_date_month, type, area, gender) + VALUES (1294951, 'f72a5b32-449f-4090-9a2a-ebbdd8d3c2e5', 'Kwamé Ryan', 'Ryan, Kwamé', 1970, NULL, 1, 38, 1) + , (831634, 'dfeba5ea-c967-4ad2-9cdd-3cffb4320143', 'BBC Concert Orchestra', 'BBC Concert Orchestr', 1952, 1, 5, 221, NULL); + +INSERT INTO link (id, link_type) VALUES (199471, 794), (199854, 807), (199871, 806); + +INSERT INTO l_event_place (id, link, entity0, entity1) VALUES (51345, 199471, 59357, 729); + +INSERT INTO l_artist_event (id, link, entity0, entity1) VALUES (160762, 199854, 831634, 59357), (160763, 199871, 1294951, 59357); diff --git a/test/sql/instrument.sql b/test/sql/instrument.sql new file mode 100644 index 00000000..4de8b19f --- /dev/null +++ b/test/sql/instrument.sql @@ -0,0 +1,41 @@ +INSERT INTO editor (id, name, password, privs, email, website, bio, member_since, + email_confirm_date, last_login_date, ha1) + VALUES (1, 'new_editor', '{CLEARTEXT}password', 1+8+32+512, 'test@email.com', 'http://test.website', + 'biography', '1989-07-23', '2005-10-20', '2013-04-05', 'aa550c5b01407ef1f3f0d16daf9ec3c8'); + +INSERT INTO instrument + (id, gid, name, type, comment, + description, last_updated) + VALUES + (3, '745c079d-374e-4436-9448-da92dedef3ce', 'Test Instrument', 2, + 'Yet Another Test Instrument', 'This is a description!', '2009-07-09'); + +INSERT INTO instrument (id, gid, name) + VALUES (4, '945c079d-374e-4436-9448-da92dedef3cf', 'Minimal Instrument'), + (5, 'a56d18ae-485f-5547-a559-eba3efef04d0', 'Minimal Instrument 2'); + +INSERT INTO artist (id, gid, name, sort_name) VALUES + (1, '5441c29d-3602-4898-b1a1-b77fa23b8e50', 'David Bowie', 'David Bowie'); + + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'David Bowie', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); +INSERT INTO artist_credit_name (artist_credit, position, artist, name) VALUES (1, 0, 1, 'David Bowie'); + +INSERT INTO recording (id, gid, name, artist_credit, length) VALUES + (1, '722190f8-f718-482f-a8bc-a8d479426a30', '“Heroes”', 1, 370000); + +INSERT INTO annotation (id, editor, text) VALUES (1, 1, 'Test annotation 1'); +INSERT INTO annotation (id, editor, text) VALUES (2, 1, 'Test annotation 2'); + +INSERT INTO instrument_annotation (instrument, annotation) VALUES (3, 1); +INSERT INTO instrument_annotation (instrument, annotation) VALUES (4, 2); + +INSERT INTO instrument_gid_redirect VALUES ('a4ef1d08-962e-4dd6-ae14-e42a6a97fc11', 3); + +INSERT INTO link (id, link_type, attribute_count) VALUES (1, 148, 2); +INSERT INTO link_attribute (link, attribute_type) VALUES (1, (SELECT id FROM link_attribute_type WHERE gid = '945c079d-374e-4436-9448-da92dedef3cf')); +INSERT INTO link_attribute_credit (link, attribute_type, credited_as) VALUES (1, (SELECT id FROM link_attribute_type WHERE gid = '945c079d-374e-4436-9448-da92dedef3cf'), 'blah instrument'); +INSERT INTO link_attribute (link, attribute_type) VALUES (1, (SELECT id FROM link_attribute_type WHERE gid = 'a56d18ae-485f-5547-a559-eba3efef04d0')); +INSERT INTO link_attribute_credit (link, attribute_type, credited_as) VALUES (1, (SELECT id FROM link_attribute_type WHERE gid = 'a56d18ae-485f-5547-a559-eba3efef04d0'), 'stupid instrument'); +INSERT INTO l_artist_recording (id, link, entity0, entity1) VALUES (4, 1, 1, 1); diff --git a/test/sql/label.sql b/test/sql/label.sql new file mode 100644 index 00000000..e0014b39 --- /dev/null +++ b/test/sql/label.sql @@ -0,0 +1,19 @@ +INSERT INTO area (id, gid, name, type) VALUES + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1); +INSERT INTO country_area (area) VALUES (221); +INSERT INTO iso_3166_1 (area, code) VALUES (221, 'GB'); + +INSERT INTO label (id, gid, name, type, area, label_code, + begin_date_year, begin_date_month, begin_date_day, + end_date_year, end_date_month, end_date_day, comment) + VALUES (3, '46f0f4cd-8aab-4b33-b698-f459faf64190', 'Warp Records', 3, 221, 2070, + 1989, 02, 03, 2008, 05, 19, 'Sheffield based electronica label'); + +INSERT INTO label (id, gid, name) + VALUES (2, 'f2a9a3c0-72e3-11de-8a39-0800200c9a66', 'To Merge'); + +INSERT INTO editor (id, name, password, ha1) VALUES (1, 'editor', '{CLEARTEXT}pass', '3f3edade87115ce351d63f42d92a1834'); +INSERT INTO annotation (id, editor, text, changelog) VALUES (1, 1, 'Label Annotation', 'Changes'); +INSERT INTO label_annotation (label, annotation) VALUES (3, 1); + +INSERT INTO label_gid_redirect (gid, new_id) VALUES ('efdf3fe9-c293-4acd-b4b2-8d2a7d4f9592', 3); diff --git a/test/sql/place.sql b/test/sql/place.sql new file mode 100644 index 00000000..52a074d3 --- /dev/null +++ b/test/sql/place.sql @@ -0,0 +1,11 @@ +INSERT INTO area (id, gid, name, type) VALUES + (241, '89a675c2-3e37-3518-b83c-418bad59a85a', 'Europe', 1), + (222, '489ce91b-6658-3307-9877-795b68554c98', 'United States', 1); + +INSERT INTO country_area (area) VALUES (222), (241); +INSERT INTO iso_3166_1 (area, code) VALUES (222, 'US'), (241, 'XE'); + +INSERT INTO place (id, gid, name, type, address, area, coordinates, comment, edits_pending, last_updated, begin_date_year, begin_date_month, begin_date_day, end_date_year, end_date_month, end_date_day, ended) VALUES (1, 'df9269dd-0470-4ea2-97e8-c11e46080edd', 'A Test Place', 2, 'An Address', 241, '(0.323,1.234)', 'A PLACE!', 0, '2013-09-07 14:40:22.041309+00', 2013, NULL, NULL, NULL, NULL, NULL, '0'); + +INSERT INTO place_alias (id, name, sort_name, place, edits_pending) + VALUES (1, 'A Test Alias', 'A Test Alias', 1, 0); diff --git a/test/sql/recording.sql b/test/sql/recording.sql new file mode 100644 index 00000000..588ea78a --- /dev/null +++ b/test/sql/recording.sql @@ -0,0 +1,44 @@ +INSERT INTO artist (id, gid, name, sort_name) + VALUES (1, '945c079d-374e-4436-9448-da92dedef3cf', 'Artist', 'Artist'); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'Artist', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); +INSERT INTO artist_credit_name (artist_credit, position, artist, name, join_phrase) + VALUES (1, 0, 1, 'Artist', ''); + +INSERT INTO recording (id, gid, name, artist_credit, length) + VALUES (1, '54b9d183-7dab-42ba-94a3-7388a66604b8', 'King of the Mountain', 1, 293720), + (2, '659f405b-b4ee-4033-868a-0daa27784b89', 'π', 1, 369680), + (3, 'ae674299-2824-4500-9516-653ac1bc6f80', 'Bertie', 1, 258839), + (4, 'b1d58a57-a0f3-4db8-aa94-868cdc7bc3bb', 'Mrs. Bartolozzi', 1, 358960), + (5, '44f52946-0c98-47ba-ba60-964774db56f0', 'How to Be Invisible', 1, 332613), + (6, '07614140-8bb8-4db9-9dcc-0917c3a8471b', 'Joanni', 1, 296160); + +INSERT INTO release_group (id, gid, name, artist_credit, type) VALUES (1, '7c3218d7-75e0-4e8c-971f-f097b6c308c5', 'Aerial', 1, 1); + +INSERT INTO release (id, gid, name, artist_credit, release_group) + VALUES (1, 'f205627f-b70a-409d-adbe-66289b614e80', 'Aerial', 1, 1), + (2, '9b3d9383-3d2a-417f-bfbb-56f7c15f075b', 'Aerial', 1, 1), + (3, 'ab3d9383-3d2a-417f-bfbb-56f7c15f075b', 'Aerial', 1, 1); + +INSERT INTO release_unknown_country (release, date_year) +VALUES (1, 2007), (2, 2008); + +INSERT INTO medium_format (id, gid, name, has_discids) VALUES (123465, '52014420-cae8-11de-8a39-0800200c9a26', 'Format', TRUE); +INSERT INTO medium (id, release, position, format, name) VALUES (1, 1, 1, 123465, 'A Sea of Honey'); +INSERT INTO medium (id, release, position, format, name) VALUES (2, 1, 2, 123465, 'A Sky of Honey'); + +INSERT INTO track (id, gid, medium, position, number, recording, name, artist_credit, length) + VALUES (1, '66c2ebff-86a8-4e12-a9a2-1650fb97d9d8', 1, 1, 1, 1, 'King of the Mountain', 1, NULL), + (2, 'b0caa7d1-0d1e-483e-b22b-ec6ab7fada06', 1, 2, 2, 2, 'π', 1, 369680), + (3, 'f891acda-39d6-4a7f-a9d1-dd87b7c46a0a', 1, 3, 3, 3, 'Bertie', 1, 258839); + +INSERT INTO track (id, gid, medium, position, number, recording, name, artist_credit, length) + VALUES (4, '6c04d03c-4995-43be-8530-215ca911dcbf', 1, 4, 4, 4, 'Mrs. Bartolozzi', 1, 358960), + (5, '849dc232-c33a-4611-a6a5-5a0969d63422', 1, 5, 5, 5, 'How to Be Invisible', 1, 332613); + +INSERT INTO link (id, link_type, attribute_count, begin_date_year, begin_date_month, begin_date_day, end_date_year, end_date_month, end_date_day, ended) + VALUES (1, 151, 0, 1971, 2, NULL, 1972, 2, NULL, true); + +INSERT INTO l_artist_recording (id, link, entity0, entity1) VALUES (1, 1, 1, 1); +INSERT INTO l_artist_recording (id, link, entity0, entity1) VALUES (2, 1, 1, 2); diff --git a/test/sql/release-group.sql b/test/sql/release-group.sql new file mode 100644 index 00000000..8b33b24a --- /dev/null +++ b/test/sql/release-group.sql @@ -0,0 +1,39 @@ +INSERT INTO artist (id, gid, name, sort_name) + VALUES (1, 'a9d99e40-72d7-11de-8a39-0800200c9a66', 'Name', 1); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'Name', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); +INSERT INTO artist_credit_name (artist_credit, artist, name, position, join_phrase) + VALUES (1, 1, 'Name', 0, ''); + +INSERT INTO release_group (id, gid, name, artist_credit, type, comment, edits_pending) + VALUES (1, '7b5d22d0-72d7-11de-8a39-0800200c9a66', 'Release Group', 1, 1, 'Comment', 2); + +INSERT INTO release_group (id, gid, name, artist_credit, type, comment, edits_pending) + VALUES (2, '3b4faa80-72d9-11de-8a39-0800200c9a66', 'Release Name', 1, 1, 'Comment', 2); + +INSERT INTO release (id, gid, name, artist_credit, release_group) + VALUES (1, '4c767e70-72d8-11de-8a39-0800200c9a66', 'Release Name', 1, 1); + +INSERT INTO editor (id, name, password, ha1) VALUES (1, 'editor', '{CLEARTEXT}pass', '3f3edade87115ce351d63f42d92a1834'); +INSERT INTO annotation (id, editor, text, changelog) VALUES (1, 1, 'Annotation', 'change'); +INSERT INTO release_group_annotation (release_group, annotation) VALUES (1, 1); + +INSERT INTO release_group_gid_redirect (gid, new_id) VALUES ('77637e8c-be66-46ea-87b3-73addc722fc9', 1); + +INSERT INTO artist (id, gid, name, sort_name) + VALUES (2, '7a906020-72db-11de-8a39-0800200c9a66', 'Various Artists', 'Various Artists'); +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (2, 'Various Artists', 1, 'c44109ce-57d7-3691-84c8-37926e3d41d2'); +INSERT INTO artist_credit_name (artist_credit, artist, name, position, join_phrase) VALUES (2, 2, 'Various Artists', 1, ''); + +INSERT INTO release_group (id, gid, name, artist_credit) + VALUES (3, '25b6fe30-ff5b-11de-8a39-0800200c9a66', 'Various Release', 2); +INSERT INTO release (id, gid, name, artist_credit, release_group) + VALUES (3, '25b6fe30-ff5b-11de-8a39-0800200c9a66', 'Various Release', 2, 3); + +INSERT INTO medium (id, track_count, release, position) VALUES (1, 0, 3, 1); +INSERT INTO recording (id, artist_credit, name, gid) + VALUES (1, 2, 'Track on recording', 'b43eb990-ff5b-11de-8a39-0800200c9a66'); +INSERT INTO track (id, gid, name, artist_credit, medium, position, number, recording) + VALUES (1, '899aaf2a-a18d-4ed5-9c18-03485df72793', 'Track on recording', 1, 1, 1, 1, 1); diff --git a/test/sql/release.sql b/test/sql/release.sql new file mode 100644 index 00000000..138d2793 --- /dev/null +++ b/test/sql/release.sql @@ -0,0 +1,61 @@ +INSERT INTO artist (id, gid, name, sort_name) + VALUES (1, 'a9d99e40-72d7-11de-8a39-0800200c9a66', 'Name', 'Name'); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'Name', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); +INSERT INTO artist_credit_name (artist_credit, artist, name, position, join_phrase) + VALUES (1, 1, 'Name', 0, ''); + +INSERT INTO area (id, gid, name, type) VALUES + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1); +INSERT INTO country_area (area) VALUES (221); +INSERT INTO iso_3166_1 (area, code) VALUES (221, 'GB'); + +INSERT INTO release_group (id, gid, name, artist_credit, type, comment, edits_pending) + VALUES (1, '3b4faa80-72d9-11de-8a39-0800200c9a66', 'Arrival', 1, 1, 'Comment', 2); + +INSERT INTO release (id, gid, name, artist_credit, release_group, status, packaging, language, script, barcode, comment, edits_pending) VALUES (1, 'f34c079d-374e-4436-9448-da92dedef3ce', 'Arrival', 1, 1, 1, 1, 145, 3, '731453398122', 'Comment', 2); +INSERT INTO release_country (release, country, date_year, date_month, date_day) VALUES (1, 221, 2009, 5, 8); +; + +INSERT INTO release (id, gid, name, artist_credit, release_group) VALUES (2, '7a906020-72db-11de-8a39-0800200c9a66', 'Release #2', 1, 1); +; + +INSERT INTO label (id, gid, name) VALUES (1, '00a23bd0-72db-11de-8a39-0800200c9a66', 'Label'); + +INSERT INTO release_label (id, release, label, catalog_number) + VALUES (1, 1, 1, 'ABC-123'), (2, 1, 1, 'ABC-123-X'); + +INSERT INTO editor (id, name, password, privs, email, website, bio, email_confirm_date, member_since, last_login_date, ha1) VALUES (1, 'editor', '{CLEARTEXT}pass', 0, 'test@editor.org', 'http://musicbrainz.org', 'biography', '2005-10-20', '1989-07-23', now(), '3f3edade87115ce351d63f42d92a1834'); +INSERT INTO annotation (id, editor, text, changelog) VALUES (1, 1, 'Annotation', 'change'); +INSERT INTO release_annotation (release, annotation) VALUES (1, 1); + +INSERT INTO release_gid_redirect (gid, new_id) VALUES ('71dc55d8-0fc6-41c1-94e0-85ff2404997d', 1); + +INSERT INTO artist (id, gid, name, sort_name, comment) + VALUES (2, '7a906020-72db-11de-8a39-0800200c9a66', 'Various Artists', 'Various Artists', ''), + (3, '1a906020-72db-11de-8a39-0800200c9a66', 'Various Artists', 'Various Artists', 'Various Artists 2'); +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (2, 'Various Artists', 1, 'c44109ce-57d7-3691-84c8-37926e3d41d2'); +INSERT INTO artist_credit_name (artist_credit, artist, name, position, join_phrase) VALUES (2, 2, 'Various Artists', 1, ''); + +INSERT INTO release_group (id, gid, name, artist_credit) + VALUES (2, '25b6fe30-ff5b-11de-8a39-0800200c9a66', 'Various Release', 2); +INSERT INTO release (id, gid, name, artist_credit, release_group) VALUES (3, '25b6fe30-ff5b-11de-8a39-0800200c9a66', 'Various Release', 2, 2); +; + +INSERT INTO medium (id, track_count, release, position) VALUES (1, 1, 3, 1); +INSERT INTO recording (id, artist_credit, name, gid) + VALUES (1, 2, 'Track on recording', 'b43eb990-ff5b-11de-8a39-0800200c9a66'); +INSERT INTO track (id, gid, name, artist_credit, medium, position, number, recording) + VALUES (1, '30f0fccd-602d-4fab-8d44-06536e596966', 'Track on recording', 1, 1, 1, 1, 1), + (100, 'f9864eea-5455-4a8e-ad29-e0652cfe1452', 'Track on recording', 1, 1, 2, 2, 1); + +INSERT INTO release_group (id, gid, name, artist_credit) + VALUES (4, '329fb554-2a81-3d8a-8e22-ec2c66810019', 'Blonde on Blonde', 2); +INSERT INTO release (id, gid, name, artist_credit, release_group) VALUES (5, '538aff00-a009-4515-a064-11a6d5a502ee', 'Blonde on Blonde', 2, 4); +; + +-- release_meta +UPDATE release_meta SET cover_art_presence = 'present' WHERE id IN (7, 8); +UPDATE release_meta SET cover_art_presence = 'darkened' WHERE id = 9; diff --git a/test/sql/series.sql b/test/sql/series.sql new file mode 100644 index 00000000..01284c36 --- /dev/null +++ b/test/sql/series.sql @@ -0,0 +1,51 @@ +INSERT INTO series (id, gid, name, comment, type, ordering_type) + VALUES (1, 'a8749d0c-4a5a-4403-97c5-f6cd018f8e6d', 'Test Recording Series', 'test comment 1', 3, 1), + (2, '2e8872b9-2745-4807-a84e-094d425ec267', 'Test Work Series', 'test comment 2', 4, 2), + (3, 'dbb23c50-d4e4-11e3-9c1a-0800200c9a66', 'Dumb Recording Series', '', 3, 1); + +INSERT INTO series_alias (id, series, name, type, sort_name) VALUES + (1, 1, 'Test Recording Series Alias', 2, 'Test Recording Series Alias'); + +INSERT INTO link (id, link_type, attribute_count) VALUES + (1, 740, 1), (2, 740, 1), (3, 740, 1), (4, 740, 1), + (5, 743, 1), (6, 743, 1), (7, 743, 1), (8, 743, 1); + +INSERT INTO link_attribute (link, attribute_type) VALUES + (1, 788), (2, 788), (3, 788), (4, 788), (5, 788), (6, 788), (7, 788), (8, 788); + +INSERT INTO link_attribute_text_value (link, attribute_type, text_value) + VALUES (1, 788, 'A1'), + (2, 788, 'A11'), + (3, 788, 'A10'), + (4, 788, 'A100'), + (5, 788, 'WTF 87'), + (6, 788, 'WTF 21'), + (7, 788, 'WTF 99'), + (8, 788, 'WTF 12'); + +INSERT INTO artist (id, gid, name, sort_name) VALUES + (77, 'ac3a3195-ba87-4154-a937-bbc06aac4038', 'Some Artist', 'Some Artist'); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'Shared Name', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'); + +INSERT INTO artist_credit_name (artist_credit, position, artist, name) VALUES + (1, 0, 77, 'Shared Name'); + +INSERT INTO recording (id, gid, name, artist_credit, length) VALUES + (1, '123c079d-374e-4436-9448-da92dedef3ce', 'Dancing Queen', 1, 123456), + (2, '54b9d183-7dab-42ba-94a3-7388a66604b8', 'King of the Mountain', 1, 293720), + (3, '659f405b-b4ee-4033-868a-0daa27784b89', 'π', 1, 369680), + (4, 'ae674299-2824-4500-9516-653ac1bc6f80', 'Bertie', 1, 258839); + +INSERT INTO work (id, gid, name, type) VALUES + (1, '7e0e3ea0-d674-11e3-9c1a-0800200c9a66', 'Wōrk1', 1), + (2, 'f89a8de8-f0e3-453c-9516-5bc3edd2fd88', 'Wōrk2', 1), + (3, '8234f641-4231-4b2f-a14f-c469b9b8de11', 'Wōrk3', 1), + (4, 'efe72c7d-652d-4243-b01b-152997bb730e', 'Wōrk4', 1); + +INSERT INTO l_recording_series (id, link, entity0, entity1, link_order) VALUES + (1, 1, 1, 1, 1), (2, 2, 2, 1, 2), (3, 3, 3, 3, 1), (4, 4, 4, 3, 2); + +INSERT INTO l_series_work (id, link, entity0, entity1, link_order) VALUES + (1, 5, 2, 1, 1), (2, 6, 2, 2, 2), (3, 7, 2, 3, 3), (4, 8, 2, 4, 4); diff --git a/test/sql/tag.sql b/test/sql/tag.sql new file mode 100644 index 00000000..6bf7f358 --- /dev/null +++ b/test/sql/tag.sql @@ -0,0 +1,33 @@ +INSERT INTO artist (id, gid, name, sort_name) + VALUES (3, 'e2a083a9-9942-4d6e-b4d2-8397320b95f7', 'Artist 1', 'Artist 1'), + (4, '2fed031c-0e89-406e-b9f0-3d192637907a', 'Artist 2', 'Artist 2'); + +INSERT INTO tag (id, name) + VALUES (1, 'musical'), + (2, 'rock'), + (3, 'jazz'), + (4, 'world music'); + +INSERT INTO editor (id, name, password, ha1) + VALUES (11, 'editor1', '{CLEARTEXT}password', '0e5b1cce99adc89b535a3c6523c5410a'), + (12, 'editor2', '{CLEARTEXT}password', '9ab932d00c88daf4a3ccf3a25e00f977'), + (13, 'editor3', '{CLEARTEXT}password', '8226c71cd2dd007dc924910793b8ca83'), + (14, 'editor4', '{CLEARTEXT}password', 'f0ab22e1a22cb1e60fea481f812450cb'), + (15, 'editor5', '{CLEARTEXT}password', '3df132c9df92678048a6b25c5ad751ef'); + +INSERT INTO artist_tag_raw (tag, artist, editor) + VALUES (1, 3, 11), + (2, 3, 12), + (2, 3, 13), + (2, 3, 14), + (1, 4, 11), + (1, 4, 12), + (1, 4, 13), + (1, 4, 14), + (1, 4, 15), + (2, 4, 11), + (2, 4, 12), + (2, 4, 13), + (3, 4, 14), + (3, 4, 15), + (4, 4, 12); diff --git a/test/sql/url.sql b/test/sql/url.sql new file mode 100644 index 00000000..e1954c98 --- /dev/null +++ b/test/sql/url.sql @@ -0,0 +1,12 @@ +INSERT INTO url (id, gid, url, last_updated, edits_pending) + VALUES (1, '9201840b-d810-4e0f-bb75-c791205f5b24', 'http://musicbrainz.org/', '2011-01-18 16:23:38+00', 0), + (2, '9b3c5c67-572a-4822-82a3-bdd3f35cf152', 'http://microsoft.com', NOW(), 0), + (3, '25d6b63a-12dc-41c9-858a-2f42ae610a7d', 'http://zh-yue.wikipedia.org/wiki/%E7%8E%8B%E8%8F%B2', '2011-01-18 16:23:38+00', 0), + (4, '7bd45cc7-6189-4712-35e1-cdf3632cf1a9', 'https://www.allmusic.com/artist/faye-wong-mn0000515659', NOW(), 0), + (5, '9b3c5c67-572a-4822-82a3-bdd3f35cf153', 'http://microsoft.fr', '2011-01-18 16:23:38+00', 2); + +INSERT INTO artist (id, gid, name, sort_name) VALUES (100, 'acd58926-4243-40bb-a2e5-c7464b3ce577', 'Faye Wong', 'Faye Wong'); +INSERT INTO link (id, link_type) VALUES (1, 179); +INSERT INTO link (id, link_type) VALUES (2, 283); +INSERT INTO l_artist_url (id, link, entity0, entity1) VALUES (1, 1, 100, 3); +INSERT INTO l_artist_url (id, link, entity0, entity1) VALUES (2, 2, 100, 4); diff --git a/test/sql/work.sql b/test/sql/work.sql new file mode 100644 index 00000000..5755fefd --- /dev/null +++ b/test/sql/work.sql @@ -0,0 +1,32 @@ +INSERT INTO artist (id, gid, name, sort_name, comment) + VALUES (1, '5f9913b0-7219-11de-8a39-0800200c9a66', 'ABBA', 'ABBA', 'ABBA 1'), + (2, '5f9913b0-7219-11de-8a39-0800200c9a67', 'ABBA', 'ABBA', 'ABBA 2'); + +INSERT INTO artist_credit (id, name, artist_count, gid) + VALUES (1, 'ABBA', 1, '949a7fd5-fe73-3e8f-922e-01ff4ca958f7'), + (2, 'ABBA', 1, 'c44109ce-57d7-3691-84c8-37926e3d41d2'); +INSERT INTO artist_credit_name (artist_credit, position, artist, name, join_phrase) + VALUES (1, 0, 1, 'ABBA', ''), (2, 0, 2, 'ABBA', ''); + +INSERT INTO work (id, gid, name, type, edits_pending, comment) + VALUES (1, '745c079d-374e-4436-9448-da92dedef3ce', 'Dancing Queen', + 1, 0, 'Work'); +INSERT INTO iswc (id, work, iswc) VALUES (1, 1, 'T-000.000.001-0'); + +INSERT INTO work (id, gid, name, type, edits_pending, comment) + VALUES (5, '755c079d-374e-4436-9448-da92dedef3ce', 'Test', + 1, 0, 'Work'); +INSERT INTO iswc (id, work, iswc) VALUES (2, 5, 'T-500.000.001-0'), (3, 5, 'T-500.000.002-0'); + +INSERT INTO work (id, gid, name, type, edits_pending, comment) + VALUES (10, '105c079d-374e-4436-9448-da92dedef3ce', 'Test', + 1, 0, 'Work'); + +INSERT INTO work (id, gid, name) VALUES (2, '745c079d-374e-4436-9448-da92dedef3cf', 'Test'); +INSERT INTO iswc (id, work, iswc) VALUES (4, 2, 'T-000.000.002-0'); + +INSERT INTO work_gid_redirect VALUES ('28e73402-5666-4d74-80ab-c3734dc699ea', 1); + +INSERT INTO editor (id, name, password, ha1) VALUES (100, 'annotation_editor', '{CLEARTEXT}password', '41bd7f7951ccec2448f74bed1b7bc6cb'); +INSERT INTO annotation (id, editor, text, changelog) VALUES (1, 100, 'Annotation', 'change'); +INSERT INTO work_annotation (work, annotation) VALUES (1, 1); diff --git a/test/test_indexing_real_data.py b/test/test_indexing_real_data.py new file mode 100644 index 00000000..53b3be50 --- /dev/null +++ b/test/test_indexing_real_data.py @@ -0,0 +1,684 @@ +import codecs +import os +import unittest +from Queue import Queue +from datetime import datetime + +import psycopg2 +from sqlalchemy.orm import Session + +from sir import querying, util, config +from sir.indexing import index_entity +from sir.schema import SCHEMA + + +class IndexingTestCase(unittest.TestCase): + TEST_SQL_FILES_DIR = os.path.join( + os.path.dirname(os.path.realpath(__file__)), 'sql') + + @classmethod + def setUpClass(cls): + config.read_config() + + def setUp(self): + self.connection = util.engine().connect() + self.transaction = self.connection.begin() + self.session = Session(bind=self.connection) + + def tearDown(self): + self.session.close() + self.transaction.rollback() + self.connection.close() + + def _test_index_entity(self, entity, expected_messages): + self.session.execute(codecs.open( + os.path.join(self.TEST_SQL_FILES_DIR, "{}.sql".format(entity)), + encoding='utf-8' + ).read()) + + bounds = querying.iter_bounds( + self.session, SCHEMA[entity].model.id, 100, 0 + ) + + queue = Queue() + index_entity(entity, bounds[0], queue, session=self.session) + + received = [] + while not queue.empty(): + received.append(queue.get_nowait()) + self.assertItemsEqual(expected_messages, received) + + def test_index_area(self): + expected = [ + { + '_store': 'EuropeEuropeXEfalse', + 'area': u'Europe', + 'iso1': u'XE', + 'ended': 'false', + 'mbid': '89a675c2-3e37-3518-b83c-418bad59a85a', + 'type': u'Country' + }, + { + '_store': 'United StatesUnited StatesUSfalse', + 'area': u'United States', + 'iso1': u'US', + 'ended': 'false', + 'mbid': '489ce91b-6658-3307-9877-795b68554c98', + 'type': u'Country' + }, + { + '_store': 'United KingdomUnited KingdomGBfalse', + 'area': u'United Kingdom', + 'iso1': u'GB', + 'ended': 'false', + 'mbid': '8a754a16-0027-3a29-b6d7-2b40ea0481ed', + 'type': u'Country' + }, + { + '_store': 'JapanJapanJPfalse', + 'area': u'Japan', + 'iso1': u'JP', + 'ended': 'false', + 'mbid': '2db42837-c832-3c27-b4a3-08198f75693c', + 'type': u'Country' + }, + { + '_store': 'GermanyGermanyDEfalse', + 'area': u'Germany', + 'iso1': u'DE', + 'ended': 'false', + 'mbid': '85752fda-13c4-31a3-bee5-0e5cb1f51dad', + 'type': u'Country' + }, + { + '_store': 'AustraliaAustraliaAUfalseオーストラリア', + 'sortname': u'\u30aa\u30fc\u30b9\u30c8\u30e9\u30ea\u30a2', + 'ended': 'false', + 'area': u'Australia', + 'iso1': u'AU', + 'alias': u'\u30aa\u30fc\u30b9\u30c8\u30e9\u30ea\u30a2', + 'mbid': '106e0bec-b638-3b37-b731-f53d507dc00e', + 'type': u'Country' + }, + { + '_store': 'SydneySydneyfalse106e0bec-b638-3b37-b731-f53d507dc00ebackwardAustraliaAustraliafalse', + 'ended': 'false', + 'mbid': '3f179da4-83c6-4a28-a627-e46b4a8ff1ed', + 'type': u'City', + 'area': u'Sydney' + } + ] + self._test_index_entity("area", expected) + + def test_index_artist(self): + expected = [ + { + 'comment': u'Yet Another Test Artist', + 'begin': '2008-01-02', + 'endarea': u'United Kingdom', + 'end': '2009-03-04', + 'sortname': u'Artist, Test', + 'artist': u'Test Artist', + 'country': u'GB', + 'area': u'United Kingdom', + 'ended': 'true', + 'mbid': '745c079d-374e-4436-9448-da92dedef3ce', + 'gender': u'Male', + '_store': 'Test ArtistArtist, TestmaleGBUnited KingdomUnited KingdomfalseUnited KingdomUnited KingdomfalseUnited KingdomUnited KingdomfalseYet Another Test Artist2008-01-022009-03-04true', + 'type': u'Person', + 'beginarea': u'United Kingdom' + }, + { + 'ended': 'false', + 'mbid': 'ca4c2228-227c-4904-932a-dff442c091ea', + '_store': 'Annotated Artist BAnnotated Artist Bfalse', + 'sortname': u'Annotated Artist B', + 'artist': u'Annotated Artist B' + }, + { + 'ended': 'false', + 'mbid': 'dc19b13a-5ca5-44f5-8f0e-0c37a8ab1958', + '_store': 'Annotated Artist AAnnotated Artist Afalse', + 'sortname': u'Annotated Artist A', + 'artist': u'Annotated Artist A' + }, + { + 'ended': 'false', + 'mbid': '945c079d-374e-4436-9448-da92dedef3cf', + '_store': 'Minimal ArtistMinimal Artistfalse', + 'sortname': u'Minimal Artist', + 'artist': u'Minimal Artist' + } + ] + self._test_index_entity("artist", expected) + + def test_index_editor(self): + expected = [ + { + 'bio': u'ModBot is a bot used by the MusicBrainz Server to perform a variety of automated functions. \\r+', + '_store': 'ModBotModBot is a bot used by the MusicBrainz Server to perform a variety of automated functions. \\r+', + 'id': 4, 'editor': u'ModBot' + }, + { + 'bio': u'biography', + '_store': 'new_editorbiography', + 'id': 1, + 'editor': u'new_editor' + }, + { + 'bio': u'second biography', + '_store': 'Alicesecond biography', + 'id': 2, + 'editor': u'Alice' + }, + { + 'bio': u'donation check test user', + '_store': 'kunodonation check test user', + 'id': 3, + 'editor': u'kuno' + } + ] + self._test_index_entity("editor", expected) + + def test_index_instrument(self): + expected = [ + { + 'comment': u'Yet Another Test Instrument', + '_store': 'Test InstrumentYet Another Test InstrumentThis is a description!', + 'description': u'This is a description!', + 'instrument': u'Test Instrument', + 'mbid': '745c079d-374e-4436-9448-da92dedef3ce', + 'type': u'String instrument' + }, + { + 'instrument': u'Minimal Instrument 2', + 'mbid': 'a56d18ae-485f-5547-a559-eba3efef04d0', + '_store': 'Minimal Instrument 2' + }, + { + 'instrument': u'Minimal Instrument', + 'mbid': '945c079d-374e-4436-9448-da92dedef3cf', + '_store': 'Minimal Instrument' + } + ] + self._test_index_entity("instrument", expected) + + def test_index_label(self): + expected = [ + { + 'comment': u'Sheffield based electronica label', + 'begin': '1989-02-03', + 'code': 2070, + 'end': '2008-05-19', + 'area': u'United Kingdom', + 'country': u'GB', + 'label': u'Warp Records', + 'ended': 'true', + 'mbid': '46f0f4cd-8aab-4b33-b698-f459faf64190', + '_store': 'Warp RecordsWarp Records2070Sheffield based electronica labelGBUnited KingdomUnited Kingdomfalse1989-02-032008-05-19true', + 'type': u'Production' + }, + { + 'ended': 'false', + 'mbid': 'f2a9a3c0-72e3-11de-8a39-0800200c9a66', + '_store': 'To MergeTo Mergefalse', + 'label': u'To Merge' + } + ] + self._test_index_entity("label", expected) + + def test_index_place(self): + expected = [ + { + 'comment': u'A PLACE!', + 'begin': '2013', + '_store': 'A Test PlaceA PLACE!An Address0.3231.234EuropeEuropefalse2013falseA Test Alias', + 'area': u'Europe', + 'long': 1.234, + 'alias': u'A Test Alias', + 'mbid': 'df9269dd-0470-4ea2-97e8-c11e46080edd', + 'ended': 'false', + 'address': u'An Address', + 'lat': 0.323, + 'place': u'A Test Place', + 'type': u'Venue' + } + ] + self._test_index_entity("place", expected) + + def test_index_recording(self): + expected = [ + { + 'primarytype': u'Album', + 'firstreleasedate': '2007', + '_store': 'King of the MountainArtistArtistArtist2007AerialAerialAlbum51Format1King of the Mountain', + 'tracks': 5, + 'format': u'Format', + 'creditname': u'Artist', + 'reid': 'f205627f-b70a-409d-adbe-66289b614e80', + 'artist': u'Artist', + 'mbid': '54b9d183-7dab-42ba-94a3-7388a66604b8', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'number': u'1', + 'recording': u'King of the Mountain', + 'tid': '66c2ebff-86a8-4e12-a9a2-1650fb97d9d8', + 'artistname': u'Artist', + 'video': 'f', + 'rgid': '7c3218d7-75e0-4e8c-971f-f097b6c308c5', + 'tracksrelease': 5, + 'release': u'Aerial', + 'position': 1, + 'tnum': 1 + }, + { + '_store': 'Joanni296160ArtistArtistArtist', + 'qdur': 148, + 'artist': u'Artist', + 'creditname': u'Artist', + 'artistname': u'Artist', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'recording': u'Joanni', + 'mbid': '07614140-8bb8-4db9-9dcc-0917c3a8471b', + 'video': 'f', + 'dur': 296160 + }, + { + 'tnum': 5, + 'primarytype': u'Album', + '_store': 'How to Be Invisible332613ArtistArtistArtist2007AerialAerialAlbum51Format5How to Be Invisible332613', + 'qdur': 166, + 'number': u'5', + 'video': 'f', + 'recording': u'How to Be Invisible', + 'creditname': u'Artist', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'tracksrelease': 5, + 'tid': '849dc232-c33a-4611-a6a5-5a0969d63422', + 'dur': 332613, + 'firstreleasedate': '2007', + 'format': u'Format', + 'rgid': '7c3218d7-75e0-4e8c-971f-f097b6c308c5', + 'artistname': u'Artist', 'artist': u'Artist', + 'tracks': 5, + 'reid': 'f205627f-b70a-409d-adbe-66289b614e80', + 'mbid': '44f52946-0c98-47ba-ba60-964774db56f0', + 'release': u'Aerial', + 'position': 1 + }, + { + 'tnum': 4, + 'primarytype': u'Album', + '_store': 'Mrs. Bartolozzi358960ArtistArtistArtist2007AerialAerialAlbum51Format4Mrs. Bartolozzi358960', + 'qdur': 179, + 'number': u'4', + 'video': 'f', + 'recording': u'Mrs. Bartolozzi', + 'creditname': u'Artist', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'tracksrelease': 5, + 'tid': '6c04d03c-4995-43be-8530-215ca911dcbf', + 'dur': 358960, + 'firstreleasedate': '2007', + 'format': u'Format', + 'rgid': '7c3218d7-75e0-4e8c-971f-f097b6c308c5', + 'artistname': u'Artist', + 'artist': u'Artist', + 'tracks': 5, + 'reid': 'f205627f-b70a-409d-adbe-66289b614e80', + 'mbid': 'b1d58a57-a0f3-4db8-aa94-868cdc7bc3bb', + 'release': u'Aerial', + 'position': 1 + }, + { + 'tnum': 3, + 'primarytype': u'Album', + '_store': 'Bertie258839ArtistArtistArtist2007AerialAerialAlbum51Format3Bertie258839', + 'qdur': 129, + 'number': u'3', + 'video': 'f', + 'recording': u'Bertie', + 'creditname': u'Artist', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'tracksrelease': 5, + 'tid': 'f891acda-39d6-4a7f-a9d1-dd87b7c46a0a', + 'dur': 258839, + 'firstreleasedate': '2007', + 'format': u'Format', + 'rgid': '7c3218d7-75e0-4e8c-971f-f097b6c308c5', + 'artistname': u'Artist', + 'artist': u'Artist', + 'tracks': 5, + 'reid': 'f205627f-b70a-409d-adbe-66289b614e80', + 'mbid': 'ae674299-2824-4500-9516-653ac1bc6f80', + 'release': u'Aerial', + 'position': 1 + }, + { + 'tnum': 2, + 'primarytype': u'Album', + '_store': 'π369680ArtistArtistArtist2007AerialAerialAlbum51Format2π369680', + 'qdur': 184, + 'number': u'2', + 'video': 'f', + 'recording': u'\u03c0', + 'creditname': u'Artist', + 'arid': '945c079d-374e-4436-9448-da92dedef3cf', + 'tracksrelease': 5, + 'tid': 'b0caa7d1-0d1e-483e-b22b-ec6ab7fada06', + 'dur': 369680, + 'firstreleasedate': '2007', + 'format': u'Format', + 'rgid': '7c3218d7-75e0-4e8c-971f-f097b6c308c5', + 'artistname': u'Artist', + 'artist': u'Artist', + 'tracks': 5, + 'reid': 'f205627f-b70a-409d-adbe-66289b614e80', + 'mbid': '659f405b-b4ee-4033-868a-0daa27784b89', + 'release': u'Aerial', + 'position': 1 + } + ] + self._test_index_entity("recording", expected) + + def test_index_release(self): + expected = [ + { + 'primarytype': u'Album', + '_store': 'Release #2NameNameNameArrivalCommentAlbum', + 'artist': u'Name', + 'creditname': u'Name', + 'artistname': u'Name', + 'arid': 'a9d99e40-72d7-11de-8a39-0800200c9a66', + 'mbid': '7a906020-72db-11de-8a39-0800200c9a66', + 'rgid': '3b4faa80-72d9-11de-8a39-0800200c9a66', + 'release': u'Release #2', + 'quality': -1 + }, + { + 'comment': u'Comment', + 'lang': u'deu', + 'script': u'Ugar', + '_store': 'ArrivalOfficialCommentJewel CasedeuUgarNameNameNameArrivalCommentAlbum2009-05-08GB2009-05-08United KingdomUnited KingdomGB731453398122ABC-123-XLabelABC-123Label', + 'artist': u'Name', + 'creditname': u'Name', + 'country': u'GB', + 'barcode': u'731453398122', + 'status': u'Official', + 'artistname': u'Name', + 'arid': 'a9d99e40-72d7-11de-8a39-0800200c9a66', + 'label': u'Label', + 'packaging': u'Jewel Case', + 'date': '2009-05-08', + 'mbid': 'f34c079d-374e-4436-9448-da92dedef3ce', + 'catno': set([u'ABC-123', u'ABC-123-X']), + 'rgid': '3b4faa80-72d9-11de-8a39-0800200c9a66', + 'laid': '00a23bd0-72db-11de-8a39-0800200c9a66', + 'release': u'Arrival', + 'quality': -1, + 'primarytype': u'Album' + }, + { + '_store': 'Blonde on BlondeVarious ArtistsVarious ArtistsVarious ArtistsBlonde on Blonde', + 'artist': u'Various Artists', + 'creditname': u'Various Artists', + 'artistname': u'Various Artists', + 'arid': '7a906020-72db-11de-8a39-0800200c9a66', + 'mbid': '538aff00-a009-4515-a064-11a6d5a502ee', + 'rgid': '329fb554-2a81-3d8a-8e22-ec2c66810019', + 'release': u'Blonde on Blonde', + 'quality': -1}, + { + '_store': 'Various ReleaseVarious ArtistsVarious ArtistsVarious ArtistsVarious Release3', + 'tracks': 3, 'artist': u'Various Artists', + 'creditname': u'Various Artists', + 'artistname': u'Various Artists', + 'arid': '7a906020-72db-11de-8a39-0800200c9a66', + 'tracksmedium': 3, + 'mbid': '25b6fe30-ff5b-11de-8a39-0800200c9a66', + 'rgid': '25b6fe30-ff5b-11de-8a39-0800200c9a66', + 'release': u'Various Release', + 'mediums': 1, + 'quality': -1 + } + ] + self._test_index_entity("release", expected) + + def test_index_release_group(self): + expected = [ + { + 'reid': '25b6fe30-ff5b-11de-8a39-0800200c9a66', + '_store': 'Various ReleaseVarious ArtistsVarious ArtistsVarious ArtistsVarious Release', + 'releases': 1, + 'artist': u'Various Artists', + 'creditname': u'Various Artists', + 'artistname': u'Various Artists', + 'arid': '7a906020-72db-11de-8a39-0800200c9a66', + 'releasegroup': u'Various Release', + 'mbid': '25b6fe30-ff5b-11de-8a39-0800200c9a66', + 'release': u'Various Release' + }, + { + 'comment': u'Comment', + 'reid': '4c767e70-72d8-11de-8a39-0800200c9a66', + '_store': 'Release GroupCommentAlbumNameName1Release Name', + 'releases': 1, + 'artist': u'Name', + 'creditname': u'Name', + 'primarytype': u'Album', + 'artistname': u'Name', + 'arid': 'a9d99e40-72d7-11de-8a39-0800200c9a66', + 'releasegroup': u'Release Group', + 'mbid': '7b5d22d0-72d7-11de-8a39-0800200c9a66', + 'release': u'Release Name' + }, + { + 'comment': u'Comment', + 'primarytype': u'Album', + '_store': 'Release NameCommentAlbumNameName1', + 'artist': u'Name', + 'creditname': u'Name', + 'artistname': u'Name', + 'arid': 'a9d99e40-72d7-11de-8a39-0800200c9a66', + 'releasegroup': u'Release Name', + 'mbid': '3b4faa80-72d9-11de-8a39-0800200c9a66' + } + ] + self._test_index_entity("release-group", expected) + + def test_index_series(self): + expected = [ + { + 'series': u'Dumb Recording Series', + 'mbid': 'dbb23c50-d4e4-11e3-9c1a-0800200c9a66', + 'type': u'Recording', + '_store': 'Dumb Recording Series' + }, + { + 'comment': u'test comment 1', + '_store': 'Test Recording Seriestest comment 1Test Recording Series Alias', + 'series': u'Test Recording Series', + 'alias': u'Test Recording Series Alias', + 'mbid': 'a8749d0c-4a5a-4403-97c5-f6cd018f8e6d', + 'type': u'Recording' + }, + { + 'comment': u'test comment 2', + 'series': u'Test Work Series', + 'mbid': '2e8872b9-2745-4807-a84e-094d425ec267', + 'type': u'Work', + '_store': 'Test Work Seriestest comment 2' + } + ] + self._test_index_entity("series", expected) + + def test_index_tag(self): + expected = [ + { + 'tag': u'musical', + '_store': 'musical', + 'id': 1 + }, + { + 'tag': u'rock', + '_store': 'rock', + 'id': 2 + }, + { + 'tag': u'jazz', + '_store': 'jazz', + 'id': 3 + }, + { + 'tag': u'world music', + '_store': 'world music', + 'id': 4 + } + ] + self._test_index_entity("tag", expected) + + def test_index_url(self): + expected = [ + { + 'url': u'http://musicbrainz.org/', + 'mbid': '9201840b-d810-4e0f-bb75-c791205f5b24', + '_store': 'http://musicbrainz.org/' + }, + { + 'url': u'http://microsoft.com', + 'mbid': '9b3c5c67-572a-4822-82a3-bdd3f35cf152', + '_store': 'http://microsoft.com' + }, + { + 'targettype': 'artist', + '_store': 'http://zh-yue.wikipedia.org/wiki/%E7%8E%8B%E8%8F%B2backwardFaye WongFaye Wong', + 'url': u'http://zh-yue.wikipedia.org/wiki/%E7%8E%8B%E8%8F%B2', + 'targetid': 'acd58926-4243-40bb-a2e5-c7464b3ce577', + 'mbid': '25d6b63a-12dc-41c9-858a-2f42ae610a7d', + 'relationtype': u'wikipedia' + }, + { + 'targettype': 'artist', + '_store': 'https://www.allmusic.com/artist/faye-wong-mn0000515659backwardFaye WongFaye Wong', + 'url': u'https://www.allmusic.com/artist/faye-wong-mn0000515659', + 'targetid': 'acd58926-4243-40bb-a2e5-c7464b3ce577', + 'mbid': '7bd45cc7-6189-4712-35e1-cdf3632cf1a9', + 'relationtype': u'allmusic' + }, + { + 'url': u'http://microsoft.fr', + 'mbid': '9b3c5c67-572a-4822-82a3-bdd3f35cf153', + '_store': 'http://microsoft.fr' + } + ] + self._test_index_entity("url", expected) + + def test_index_work(self): + expected = [ + { + 'comment': u'Work', + '_store': 'TestWork', + 'mbid': '105c079d-374e-4436-9448-da92dedef3ce', + 'work': u'Test', + 'type': u'Aria' + }, + { + 'comment': u'Work', + '_store': 'TestT-500.000.001-0T-500.000.002-0Work', + 'iswc': set([u'T-500.000.002-0', u'T-500.000.001-0']), + 'work': u'Test', + 'mbid': '755c079d-374e-4436-9448-da92dedef3ce', + 'type': u'Aria' + }, + { + 'comment': u'Work', + '_store': 'Dancing QueenT-000.000.001-0Work', + 'iswc': u'T-000.000.001-0', 'work': u'Dancing Queen', + 'mbid': '745c079d-374e-4436-9448-da92dedef3ce', + 'type': u'Aria' + }, + { + 'mbid': '745c079d-374e-4436-9448-da92dedef3cf', + 'work': u'Test', + '_store': 'TestT-000.000.002-0', + 'iswc': u'T-000.000.002-0' + } + ] + self._test_index_entity("work", expected) + + def test_index_cdstub(self): + expected = [ + { + 'comment': u'this is a comment', + 'added': datetime( + 2000, 1, 1, 0, 0, + tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=0, name=None) + ), + '_store': 'Test StubTest Artist837101029192this is a comment', + 'discid': u'YfSgiOEayqN77Irs.VNV.UNJ0Zs-', + 'artist': u'Test Artist', + 'barcode': u'837101029192', + 'tracks': 2, + 'title': u'Test Stub', + 'id': 1 + } + ] + self._test_index_entity("cdstub", expected) + + def test_index_annotation(self): + expected = [ + { + '_store': '745c079d-374e-4436-9448-da92dedef3ceTest ArtistTest annotation 1', + 'name': u'Test Artist', + 'text': u'Test annotation 1', + 'entity': '745c079d-374e-4436-9448-da92dedef3ce', + 'type': 'artist', + 'id': 1 + }, + { + '_store': '945c079d-374e-4436-9448-da92dedef3cfMinimal ArtistTest annotation 2', + 'name': u'Minimal Artist', + 'text': u'Test annotation 2', + 'entity': '945c079d-374e-4436-9448-da92dedef3cf', + 'type': 'artist', + 'id': 2 + }, + { + '_store': 'dc19b13a-5ca5-44f5-8f0e-0c37a8ab1958Annotated Artist ADuplicate annotation', + 'name': u'Annotated Artist A', + 'text': u'Duplicate annotation', + 'entity': 'dc19b13a-5ca5-44f5-8f0e-0c37a8ab1958', + 'type': 'artist', + 'id': 3 + }, + { + '_store': 'ca4c2228-227c-4904-932a-dff442c091eaAnnotated Artist BDuplicate annotation', + 'name': u'Annotated Artist B', + 'text': u'Duplicate annotation', + 'entity': 'ca4c2228-227c-4904-932a-dff442c091ea', + 'type': 'artist', + 'id': 4 + } + ] + self._test_index_entity("annotation", expected) + + def test_index_event(self): + expected = [ + { + 'comment': u'2022, Prom 60', + 'begin': '2022-09-01', + 'end': '2022-09-01', + 'artist': set([u'BBC Concert Orchestra', u'Kwam\xe9 Ryan']), + 'pid': '4352063b-a833-421b-a420-e7fb295dece0', + 'arid': set([ + 'f72a5b32-449f-4090-9a2a-ebbdd8d3c2e5', + 'dfeba5ea-c967-4ad2-9cdd-3cffb4320143' + ]), + 'ended': 'true', + 'mbid': 'ca1d24c1-1999-46fd-8a95-3d4108df5cb2', + 'place': u'Royal Albert Hall', + '_store': 'BBC Open Music Prom2022, Prom 602022-09-012022-09-01true19:30:00backwardBBC Concert OrchestraBBC Concert OrchestrbackwardKwamé RyanRyan, KwamébackwardRoyal Albert Hall', + 'type': u'Concert', + 'event': u'BBC Open Music Prom' + } + ] + self._test_index_entity("event", expected) From dff0a113406c98f8c86ab1d437938bb4815dd170 Mon Sep 17 00:00:00 2001 From: yvanzo Date: Tue, 31 May 2022 13:02:37 +0100 Subject: [PATCH 07/48] Improve test script Following the model of `docker/push.sh`: - Document expected behavior and usage with heading comments - Exit on error, for example if the first Docker Compose command failed - Support running from any working directory (using `cd`) - Support any Docker Compose setup and version: * Run `docker-compose` by default assuming v1 and `docker` group * Support `docker compose` for Docker Compose v2 * Support `sudo ...` if needed --- test.sh | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/test.sh b/test.sh index b8f1e3f8..e194a45e 100755 --- a/test.sh +++ b/test.sh @@ -1,6 +1,22 @@ -docker-compose -f docker/docker-compose.test.yml -p sir-test up -d musicbrainz_db -docker-compose -f docker/docker-compose.test.yml -p sir-test build -docker-compose -f docker/docker-compose.test.yml -p sir-test run test \ +#!/usr/bin/env bash +# +# Run tests and return 0 if these passed successfully. +# +# Usage: +# ./test.sh +# Or: +# DOCKER_COMPOSE_CMD='sudo docker-compose' ./test.sh + +set -o errexit -o nounset + +cd "$(dirname "${BASH_SOURCE[0]}")/" + +DOCKER_COMPOSE_CMD=${DOCKER_COMPOSE_CMD:-docker-compose} + +$DOCKER_COMPOSE_CMD -f docker/docker-compose.test.yml -p sir-test up -d musicbrainz_db +$DOCKER_COMPOSE_CMD -f docker/docker-compose.test.yml -p sir-test build +set +o errexit +$DOCKER_COMPOSE_CMD -f docker/docker-compose.test.yml -p sir-test run test \ dockerize -wait tcp://musicbrainz_db:5432 -timeout 600s \ bash -c "py.test --junitxml=/data/test_report.xml \ --cov=sir \ @@ -8,5 +24,5 @@ docker-compose -f docker/docker-compose.test.yml -p sir-test run test \ --cov-report html:/data/coverage-html \ $*" RET=$? -docker-compose -f docker/docker-compose.test.yml -p sir-test down +$DOCKER_COMPOSE_CMD -f docker/docker-compose.test.yml -p sir-test down exit $RET From bd99fd4dcae4b6da1523f2c9291e3bc375b0f07a Mon Sep 17 00:00:00 2001 From: yvanzo Date: Thu, 23 Jan 2020 21:44:23 +0100 Subject: [PATCH 08/48] Upgrade SQLAlchemy (1/3) from 1.0.19 to 1.1.18 See https://docs.sqlalchemy.org/en/11/changelog/migration_11.html FIXME: LinkAreaArea.area0 is not instance of InstrumentAttribute --- docs/source/conf.py | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index fde09a0c..292766a9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -34,7 +34,7 @@ autoclass_content = "both" intersphinx_mapping = {'python': ('https://docs.python.org/2.7', None), - 'sqla': ('http://docs.sqlalchemy.org/en/rel_1_0/', None), + 'sqla': ('http://docs.sqlalchemy.org/en/rel_1_1/', None), 'solr': ('https://pythonhosted.org//solrpy/', None), 'amqp': ('https://amqp.readthedocs.org/en/latest', None)} diff --git a/requirements.txt b/requirements.txt index a7d3ecaf..cde9f7f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ git+https://github.com/metabrainz/mb-rngpy.git@v-2.20201112.0#egg=mb-rngpy psycopg2==2.8.4 retrying==1.3.3 pysolr==3.8.1 -sqlalchemy==1.0.19 +sqlalchemy==1.1.18 requests==2.22.0 ujson==1.35 sentry-sdk==1.3.1 From 7a2d2adce66d4d78785ac02b286f380066ab6b1b Mon Sep 17 00:00:00 2001 From: Michael Wiencek Date: Thu, 23 Jan 2020 17:08:31 -0600 Subject: [PATCH 09/48] Fix column type check in last_model_in_path The column can now be a hybrid_propertyProxy object which we don't want to skip, necessarily. But the previous behavior of skipping strings and mbdata models has been preserved by checking for those directly. --- sir/trigger_generation/paths.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sir/trigger_generation/paths.py b/sir/trigger_generation/paths.py index 832625da..661cebb7 100644 --- a/sir/trigger_generation/paths.py +++ b/sir/trigger_generation/paths.py @@ -1,8 +1,8 @@ # Copyright (c) 2015, 2017 Wieland Hoffmann, MetaBrainz Foundation # License: MIT, see LICENSE for details +import mbdata from sqlalchemy.orm import class_mapper, aliased from sqlalchemy.orm.query import Query -from sqlalchemy.orm.attributes import InstrumentedAttribute from sqlalchemy.orm.properties import ColumnProperty, RelationshipProperty from sqlalchemy.orm.descriptor_props import CompositeProperty @@ -99,7 +99,7 @@ def last_model_in_path(model, path): # If this is not a column managed by SQLAlchemy, ignore it # TODO(roman): Document when this might happen - if not isinstance(column, InstrumentedAttribute): + if isinstance(column, (str, mbdata.models.Base)): # Let's assume some other path also covers this table return None From f53b8b7856e178cf1eeb6cf74b8024b74c47126b Mon Sep 17 00:00:00 2001 From: yvanzo Date: Mon, 3 Feb 2020 18:13:32 +0100 Subject: [PATCH 10/48] Fix debug message by creating list from generator --- sir/schema/searchentities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sir/schema/searchentities.py b/sir/schema/searchentities.py index 6b97d5b9..68a84bdb 100644 --- a/sir/schema/searchentities.py +++ b/sir/schema/searchentities.py @@ -212,7 +212,7 @@ def build_entity_query(self): partial(is_composite_column, model), required_columns) for composite_column in composite_columns: - composite_parts = (c.name for c in + composite_parts = list(c.name for c in getattr(model, composite_column). property.columns) From 129b2d463c5cf18a801baad143b60d818cee315d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Tamargo?= Date: Thu, 26 Aug 2021 14:56:40 +0300 Subject: [PATCH 11/48] Update psycopg requirement to -binary --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cde9f7f5..9de7a45d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ backports.functools_lru_cache==1.0.1 enum34==1.1.6 git+https://github.com/amCap1712/mbdata.git@v27.0.dev2#egg=mbdata git+https://github.com/metabrainz/mb-rngpy.git@v-2.20201112.0#egg=mb-rngpy -psycopg2==2.8.4 +psycopg2-binary==2.8.4 retrying==1.3.3 pysolr==3.8.1 sqlalchemy==1.1.18 From 631b7ec3ed693ac28c7af587e9df0322981ccfda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Tamargo?= Date: Mon, 30 Aug 2021 20:20:45 +0300 Subject: [PATCH 12/48] Update to 1.4, fix missing text() error This brings us back to the same issue as in 1.1 with _wildcard_token, so might as well do try and do the jump in one go. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9de7a45d..f2a3dc74 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ git+https://github.com/metabrainz/mb-rngpy.git@v-2.20201112.0#egg=mb-rngpy psycopg2-binary==2.8.4 retrying==1.3.3 pysolr==3.8.1 -sqlalchemy==1.1.18 +sqlalchemy==1.4.23 requests==2.22.0 ujson==1.35 sentry-sdk==1.3.1 From 8b16c86d70893f01a86c672e84933b52aa733cf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Tamargo?= Date: Tue, 31 Aug 2021 15:12:30 +0300 Subject: [PATCH 13/48] Use load_only rather than many defer calls The point of defer_everything_but was to effectively do what load_only does, but probably back when load_only was not an option yet. This just gets rid of it, and uses load_only instead. Since load_only cannot work on relationships, just columns, we filter relationships out of the column set before using it. --- sir/schema/searchentities.py | 40 +++++++++++++-------------- test/test_querying.py | 52 +----------------------------------- 2 files changed, 21 insertions(+), 71 deletions(-) diff --git a/sir/schema/searchentities.py b/sir/schema/searchentities.py index 68a84bdb..08023669 100644 --- a/sir/schema/searchentities.py +++ b/sir/schema/searchentities.py @@ -32,6 +32,17 @@ def is_composite_column(model, colname): return (hasattr(attr, "property") and isinstance(attr.property, CompositeProperty)) +def is_relationship_column(model, colname): + """ + Checks if a models attribute is a relationship column. + + :param model: A :ref:`declarative ` class. + :param str colname: The column name. + :rtype: bool + """ + attr = getattr(model, colname) + return (hasattr(attr, "property") and + isinstance(attr.property, RelationshipProperty)) def merge_paths(field_paths): """ @@ -64,23 +75,6 @@ def merge_paths(field_paths): current_path_dict = new_path_dict return paths - -def defer_everything_but(mapper, load, *columns): - primary_keys = [c.name for c in mapper.primary_key] - for prop in mapper.iterate_properties: - if hasattr(prop, "columns"): - key = prop.key - if (key not in columns and key[:-3] not in columns and - key[-3:] != "_id" and key != "position" and - key not in primary_keys): - # We need the _id columns for subqueries and joins - # Position is needed because sqla automatically orders by - # artist_credit_name.position - logger.debug("Deferring %s on %s", key, mapper) - load.defer(key) - return load - - class SearchField(object): """Represents a searchable field. @@ -223,12 +217,18 @@ def build_entity_query(self): required_columns.remove(composite_column) required_columns.extend(composite_parts) + # load_only cannot operate on relationship columns + # so we need to remove those before running it + relationship_columns = filter( + partial(is_relationship_column, model), + required_columns) + for relationship_column in relationship_columns: + required_columns.remove(relationship_column) + logger.debug("Loading only %s on %s", required_columns, model) - load = defer_everything_but(class_mapper(model), - load, - *required_columns) + load.load_only(*required_columns) query = query.options(load) if self.extraquery is not None: query = self.extraquery(query) diff --git a/test/test_querying.py b/test/test_querying.py index fddbd481..31be1231 100644 --- a/test/test_querying.py +++ b/test/test_querying.py @@ -6,60 +6,10 @@ from collections import defaultdict from sqlalchemy.orm.properties import RelationshipProperty from sir.querying import iterate_path_values -from sir.schema.searchentities import defer_everything_but, merge_paths +from sir.schema.searchentities import merge_paths from sir.schema import generate_update_map, SCHEMA from sir.trigger_generation.paths import second_last_model_in_path - -class DeferEverythingButTest(unittest.TestCase): - def setUp(self): - mapper = helpers.Object() - mapper.iterate_properties = [] - pk1 = helpers.Object() - pk1.name = "pk1" - pk2 = helpers.Object() - pk2.name = "pk2" - mapper.primary_key = [pk1, pk2] - - self.mapper = mapper - - prop = helpers.Object() - prop.columns = "" - self.prop = prop - self.mapper.iterate_properties.append(prop) - - self.load = mock.Mock() - self.required_columns = ["key", "key2"] - - def test_plain_column_called(self): - self.prop.key = "foo" - load = defer_everything_but(self.mapper, self.load, *self.required_columns) - load.defer.assert_called_once_with("foo") - - def test_plain_column_not_called(self): - self.prop.key = "key" - load = defer_everything_but(self.mapper, self.load, *self.required_columns) - self.assertFalse(load.defer.called) - - def test_id_column(self): - self.prop.key = "foo_id" - load = defer_everything_but(self.mapper, self.load, - *self.required_columns) - self.assertFalse(load.defer.called) - - def test_position_column(self): - self.prop.key = "position" - load = defer_everything_but(self.mapper, self.load, - *self.required_columns) - self.assertFalse(load.defer.called) - - def test_primary_key_always_loaded(self): - self.prop.key = "pk1" - load = defer_everything_but(self.mapper, self.load, - *self.required_columns) - self.assertFalse(load.defer.called) - - class IteratePathValuesTest(unittest.TestCase): @classmethod def setUpClass(cls): From fce9789a3a0803c6269f86482d35620cd57efc0f Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Tue, 31 Aug 2021 17:51:03 +0530 Subject: [PATCH 14/48] Change relationship.table to relationship.mapper I could not find docs on `table` attribute but https://docs.sqlalchemy.org/en/14/orm/internals.html#sqlalchemy.orm.RelationshipProperty.mapper documents a `mapper` attribute which seems to do the job. FWIW, there is also a `target` attribute but that is not documented in SQLAlchemy. I am not sure if the two are different or not. --- sir/amqp/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sir/amqp/handler.py b/sir/amqp/handler.py index a997ac67..0467b14f 100644 --- a/sir/amqp/handler.py +++ b/sir/amqp/handler.py @@ -378,7 +378,7 @@ def _index_by_fk(self, parsed_message): # to update the related entities. For 'one to many' relationships, the related # entity would have had an update trigger firing off to unlink the `index_entity` # before `index_entity` itself is deleted, so we can ignore those. - relevant_rels = dict((r.table.name, (list(r.local_columns)[0].name, list(r.remote_side)[0])) + relevant_rels = dict((r.mapper.mapped_table.name, (list(r.local_columns)[0].name, list(r.remote_side)[0])) for r in class_mapper(index_model).mapper.relationships if r.direction.name == 'MANYTOONE') for core_name, path in update_map[parsed_message.table_name]: From e96bb3b81dcfa7e5c6accef6a9e2ab28290b8229 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Tamargo?= Date: Tue, 31 Aug 2021 18:38:42 +0300 Subject: [PATCH 15/48] Document filter_valid_annotations --- sir/schema/queryext.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sir/schema/queryext.py b/sir/schema/queryext.py index db6b8471..eedeae1e 100644 --- a/sir/schema/queryext.py +++ b/sir/schema/queryext.py @@ -25,7 +25,8 @@ def filter_valid_annotations(query): - # TODO: Document this. What's going on in this filter? + # Skip all annotations for an entity except the last + # since all others are no longer current queries = [Query(func.max(getattr(m, "annotation_id"))). group_by( getattr(m, From 34139b1487dc80bd1246e0db480c8c5ecc8ec82a Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 1 Sep 2021 10:43:17 +0530 Subject: [PATCH 16/48] remove __tablename__ from column list __tablename__ is not a column anyways. Also, the checks in `defer_everything_but` (before we replaced it with `load_only`) filtered it out. --- sir/schema/searchentities.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sir/schema/searchentities.py b/sir/schema/searchentities.py index 08023669..df04c838 100644 --- a/sir/schema/searchentities.py +++ b/sir/schema/searchentities.py @@ -225,6 +225,11 @@ def build_entity_query(self): for relationship_column in relationship_columns: required_columns.remove(relationship_column) + # Remove __tablename__ from column list because if it + # ends up there because its not a column + if '__tablename__' in required_columns: + required_columns.remove('__tablename__') + logger.debug("Loading only %s on %s", required_columns, model) From 615ce793f0a968f36d47f42797ddfaa677fad6b4 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Tue, 7 Sep 2021 01:38:05 +0530 Subject: [PATCH 17/48] Do not use @hybrid_property in paths The path artist_links.artist.gid is not being processed correctly by iterate_path_values instead of returning the gid, it returns the artist. LinkArtistUrl.artist is a @hybrid_property and the check isinstance(LinkArtistUrl.artist, InstrumentedAttribute) returns false. To identify hybrid_attributes in iterate_path_values, we could use https://docs.sqlalchemy.org/en/14/orm/internals.html#sqlalchemy.orm.InspectionAttr.extension_type . I tried this, but currently it does not seem to work. Hence, it is better that we do not use hybrid attributes in paths for now. --- sir/querying.py | 6 ++++++ sir/schema/__init__.py | 12 ++++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/sir/querying.py b/sir/querying.py index 9821b143..efbc702f 100644 --- a/sir/querying.py +++ b/sir/querying.py @@ -7,6 +7,8 @@ from sqlalchemy.orm.attributes import InstrumentedAttribute from sqlalchemy.orm.interfaces import ONETOMANY, MANYTOONE from sqlalchemy.orm.properties import RelationshipProperty +from sqlalchemy.ext.hybrid import HYBRID_PROPERTY + logger = logging.getLogger("sir") @@ -29,6 +31,10 @@ def iterate_path_values(path, obj): returned by the :func:`getattr` call will be returned and added to the list of values for this field. + .. warning:: + + Hybrid attributes like @hybrid_property are currently not supported. + To give an example, lets presume the object we're starting with is an instance of :class:`~mbdata.models.Artist` and the path is "begin_area.name". The first :func:`getattr` call will be:: diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 6b6b02b1..d12c1eca 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -501,8 +501,8 @@ F("url", "url"), F("relationtype", ["artist_links.link.link_type.name", "release_links.link.link_type.name"]), - F("targetid", ["artist_links.artist.gid", - "release_links.release.gid"]), + F("targetid", ["artist_links.entity0.gid", + "release_links.entity0.gid"]), F("targettype", ["artist_links.__tablename__", "release_links.__tablename__"], transformfunc=tfs.url_type), @@ -532,14 +532,14 @@ F("mbid", "gid"), F("work", "name"), F("alias", "aliases.name"), - F("arid", "artist_links.artist.gid"), - F("artist", "artist_links.artist.name"), + F("arid", "artist_links.entity0.gid"), + F("artist", "artist_links.entity0.name"), F("comment", "comment"), F("iswc", "iswcs.iswc"), F("lang", "languages.language.iso_code_3"), - F("recording", "recording_links.recording.name"), + F("recording", "recording_links.entity0.name"), F("recording_count", "recording_count", transformfunc=tfs.integer_sum, trigger=False), - F("rid", "recording_links.recording.gid"), + F("rid", "recording_links.entity0.gid"), F("tag", "tags.tag.name"), F("type", "type.name") ], From e668d962701de6c527a3db787d031429d9f813b0 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Thu, 16 Sep 2021 19:52:14 +0530 Subject: [PATCH 18/48] Update SQLAlchemy documentation link --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 292766a9..7c6d0235 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -34,7 +34,7 @@ autoclass_content = "both" intersphinx_mapping = {'python': ('https://docs.python.org/2.7', None), - 'sqla': ('http://docs.sqlalchemy.org/en/rel_1_1/', None), + 'sqla': ('https://docs.sqlalchemy.org/en/14/', None), 'solr': ('https://pythonhosted.org//solrpy/', None), 'amqp': ('https://amqp.readthedocs.org/en/latest', None)} From 62b786ef3b851fbcefedd42ca5c48620bb28e0fc Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Thu, 23 Jun 2022 18:13:44 +0530 Subject: [PATCH 19/48] Try fixing AttributeError for session --- sir/indexing.py | 52 +++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/sir/indexing.py b/sir/indexing.py index dc1c84db..5319e23b 100644 --- a/sir/indexing.py +++ b/sir/indexing.py @@ -12,6 +12,7 @@ from logging import getLogger, DEBUG, INFO from pysolr import SolrError from sqlalchemy import and_ +from sqlalchemy.orm import Session from .util import SIR_EXIT from ctypes import c_bool @@ -256,31 +257,32 @@ def _query_database(entity_name, condition, data_queue, session=None): row_converter = search_entity.query_result_to_dict if session is None: - session = util.db_session() - - query = search_entity.query.filter(condition).with_session(session) - total_records = 0 - for row in query: - if not PROCESS_FLAG.value: - return - try: - data_queue.put(row_converter(row)) - except ValueError: - logger.info("Skipping %s with id %s. " - "The most likely cause of this is an " - "unsupported control character in the " - "data.", - entity_name, - row.id) - except Exception as exc: - logger.error("Failed to import %s with id %s", - entity_name, - row.id) - logger.exception(exc) - raise - else: - total_records += 1 - logger.debug("Retrieved %s records in %s", total_records, model) + session = Session(util.engine()) + + with session: + query = search_entity.query.filter(condition).with_session(session) + total_records = 0 + for row in query: + if not PROCESS_FLAG.value: + return + try: + data_queue.put(row_converter(row)) + except ValueError: + logger.info("Skipping %s with id %s. " + "The most likely cause of this is an " + "unsupported control character in the " + "data.", + entity_name, + row.id) + except Exception as exc: + logger.error("Failed to import %s with id %s", + entity_name, + row.id) + logger.exception(exc) + raise + else: + total_records += 1 + logger.debug("Retrieved %s records in %s", total_records, model) def queue_to_solr(queue, batch_size, solr_connection): From 5d86c7a6c88787fdee4aba8d54206be034519083 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Thu, 23 Jun 2022 23:29:37 +0530 Subject: [PATCH 20/48] Accept session attribute in index_entity and live_index_entity In tests, we need to pass the session manually to control transactions. Refactoring code so that tests and production code execute same code path. --- sir/indexing.py | 20 ++++++++++---------- test/test_indexing_real_data.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sir/indexing.py b/sir/indexing.py index 5319e23b..209539f5 100644 --- a/sir/indexing.py +++ b/sir/indexing.py @@ -189,9 +189,10 @@ def _index_entity_process_wrapper(args, live=False): signal.signal(signal.SIGTERM, signal.SIG_DFL) try: + session = Session(util.engine()) if live: - return live_index_entity(*args) - return index_entity(*args) + return live_index_entity(session, *args) + return index_entity(session, *args) except Exception as exc: logger.error("Failed to import %s with id in bounds %s", args[0], @@ -200,12 +201,13 @@ def _index_entity_process_wrapper(args, live=False): raise -def index_entity(entity_name, bounds, data_queue, session=None): +def index_entity(session, entity_name, bounds, data_queue): """ Retrieve rows for a single entity type identified by ``entity_name``, convert them to a dict with :func:`sir.indexing.query_result_to_dict` and put the dicts into ``queue``. + :param sqlalchemy.orm.Session session: :param str entity_name: :param bounds: :type bounds: (int, int) @@ -218,15 +220,16 @@ def index_entity(entity_name, bounds, data_queue, session=None): condition = and_(model.id >= lower_bound, model.id < upper_bound) else: condition = model.id >= lower_bound - _query_database(entity_name, condition, data_queue, session) + _query_database(session, entity_name, condition, data_queue) -def live_index_entity(entity_name, ids, data_queue): +def live_index_entity(session, entity_name, ids, data_queue): """ Retrieve rows for a single entity type identified by ``entity_name``, convert them to a dict with :func:`sir.indexing.query_result_to_dict` and put the dicts into ``queue``. + :param sqlalchemy.orm.Session session: :param str entity_name: :param [int] ids: :param Queue.Queue data_queue: @@ -235,10 +238,10 @@ def live_index_entity(entity_name, ids, data_queue): return condition = and_(SCHEMA[entity_name].model.id.in_(ids)) logger.debug("Importing %s new rows for entity %s", len(ids), entity_name) - _query_database(entity_name, condition, data_queue) + _query_database(session, entity_name, condition, data_queue) -def _query_database(entity_name, condition, data_queue, session=None): +def _query_database(session, entity_name, condition, data_queue): """ Retrieve rows for a single entity type identified by ``entity_name``, convert them to a dict with :func:`sir.indexing.query_result_to_dict` and @@ -256,9 +259,6 @@ def _query_database(entity_name, condition, data_queue, session=None): model = search_entity.model row_converter = search_entity.query_result_to_dict - if session is None: - session = Session(util.engine()) - with session: query = search_entity.query.filter(condition).with_session(session) total_records = 0 diff --git a/test/test_indexing_real_data.py b/test/test_indexing_real_data.py index 53b3be50..929a6f25 100644 --- a/test/test_indexing_real_data.py +++ b/test/test_indexing_real_data.py @@ -41,7 +41,7 @@ def _test_index_entity(self, entity, expected_messages): ) queue = Queue() - index_entity(entity, bounds[0], queue, session=self.session) + index_entity(self.session, entity, bounds[0], queue) received = [] while not queue.empty(): From 73e252844b30b298e90ab64ecf7aee127653c741 Mon Sep 17 00:00:00 2001 From: yvanzo Date: Sat, 15 Oct 2022 16:54:26 +0100 Subject: [PATCH 21/48] Document requiring MBDB materialized tables Starting from using SQLAlchemy 1.4, SIR crashes if materialized (or denormalized) tables for the MusicBrainz database are not available. Just document it for now and consider SEARCH-687 for better checks. --- docs/source/setup/amqp.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/setup/amqp.rst b/docs/source/setup/amqp.rst index e1a552eb..e093361b 100644 --- a/docs/source/setup/amqp.rst +++ b/docs/source/setup/amqp.rst @@ -23,6 +23,9 @@ Database Sir requires that you both install an extension into your MusicBrainz database and add triggers to it. +It also requires to have built the materialized (or denormalized) tables +for the MusicBrainz database. + AMQP Extension ++++++++++++++ From 2bbacca61d36b38e312478d0f04331bf17e61b62 Mon Sep 17 00:00:00 2001 From: yvanzo Date: Sat, 15 Oct 2022 18:20:19 +0100 Subject: [PATCH 22/48] SEARCH-675: Document using RabbitMQ (#139) Provide information for basic maintenance and implementation details. Follow guidelines for using RabbitMQ service: - https://github.com/metabrainz/guidelines/blob/60c708538bc84e11a79d2cdad38274f5b04276ad/services/README.md - https://github.com/metabrainz/guidelines/blob/f2dcf19bac2d76d7348758c021f1e5ada4451cf8/services/RabbitMQ.md --- docs/source/index.rst | 1 + docs/source/service/index.rst | 6 ++ docs/source/service/rabbitmq.rst | 104 +++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 docs/source/service/index.rst create mode 100644 docs/source/service/rabbitmq.rst diff --git a/docs/source/index.rst b/docs/source/index.rst index b850f7e3..f82e5dc2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,6 +10,7 @@ Contents: usage import queues + service/index api Indices and tables diff --git a/docs/source/service/index.rst b/docs/source/service/index.rst new file mode 100644 index 00000000..3e964762 --- /dev/null +++ b/docs/source/service/index.rst @@ -0,0 +1,6 @@ +.. _service: + +Service maintenance +=================== + +.. include:: rabbitmq.rst diff --git a/docs/source/service/rabbitmq.rst b/docs/source/service/rabbitmq.rst new file mode 100644 index 00000000..b92fa08f --- /dev/null +++ b/docs/source/service/rabbitmq.rst @@ -0,0 +1,104 @@ +.. _rabbitmq: + +RabbitMQ +-------- + +Maintenance +~~~~~~~~~~~ + +Requirements +++++++++++++ + +* Tolerance to connectivity issues: + When running in watch mode, losing connection to RabbitMQ can make the indexer + to stale indefinitely. + To recover, the container running the indexer has to be manually restarted. + See the ticket `SEARCH-678 `_ + for follow-up on improving tolerance. +* Maintenance mode: + It doesn’t exist. + To perform maintenance operations, it requires switching to another instance + of RabbitMQ to prevent any data loss, even for a short period of time. +* Data importance: + The RabbitMQ instance is conveying notification messages about changes that + must be made to the search indexes. + If any message is lost, all search indexes would have to be rebuilt, + which currently takes hours and implies a downtime for searches. + See the ticket `SEARCH-674 `_ + for follow-up on rebuilding with zero-downtime. +* Data persistence: + Messages are expected to be processed within seconds (or minutes during + activity peaks), so there is no need for persistent volumes. + Losing these messages isn’t critical either as search indexes can be + rebuilt in hours, so there is no need for backups either. + +Procedures +++++++++++ + + +* Start service: + + See :ref:`amqp` + +* Reload service configuration: + + After: + + * Check the indexer logs to ensure that it did not stale and that it continues + to process new messages. + +* Stop service: + + Before: + + * Uninstall search triggers + * Stop the live indexer + + It implies that search indexes will be outdated for good. + Updating search indexes requires to rebuild these and takes hours of downtime. + +* Restart service: + + It implies that search indexes will be likely missing some updates. + Updating search indexes requires to rebuild these and takes hours of downtime. + +* Move service: + + * Create vhost, user, permissions, queues in the new instance + * Update broker in PostgreSQL to point to the new instance + * Once the queues in the old instance are empty, + switch the live indexer to the new instance + + Neiher data loss nor downtime will occur. + +* Remove service: + + Before: + + * Uninstall search triggers + * Stop the live indexer + + It implies that search indexes will be outdated for good. + Updating search indexes requires to rebuild these and takes hours of downtime. + +Implementation details +~~~~~~~~~~~~~~~~~~~~~~ + +* Connectivity issues are reported through both Docker logs and Sentry. +* Producer and consumer are separate as follows: + + * Producer is `pg_amqp` used by triggers in Postgres database. + + * ack mode: transactional + * heartbeat timeout: (not using 0.8 version) + * message protocol version: 0.8 + + * Consumer is `sir` running in watch mode for live indexing. + + * ack mode: basic/manual + * heartbeat timeout: (not configured/server’s default) + * message protocol version: 0.9.1 + +* There are known issues related to queues declaration; See :ref:`amqp` +* Connections are not named properly (just using proxy interface IP and port) + From 2789d1875f15e44fa4d7fed48d6c698c025811b9 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 17 Oct 2022 21:50:40 +0530 Subject: [PATCH 23/48] Fix deprecation warnings in SIR (#140) * Fix deprecation warnings in SIR After upgrading SIR to SQLAlchemy 1.4, some deprecation warnings came up. 1. SADeprecationWarning: Use .persist_selectable (deprecated since: 1.3) mapped_table = class_mapper(entity.model).mapped_table.name Fix is straightforward use .persist_selectable instead of .mapped_table as looking at following docs it only seems to have been renamed. .mapped_table in SQLAlchemy 1.2: https://docs.sqlalchemy.org/en/12/orm/mapping_api.html?highlight=mapped_table#sqlalchemy.orm.Mapper.mapped_table .persist_selectable in SQLAlchemy 1.4: https://docs.sqlalchemy.org/en/14/orm/mapping_api.html?highlight=mapped_table#sqlalchemy.orm.Mapper.persist_selectable 2. SADeprecationWarning: Calling URL() directly is deprecated and will be disabled in a future release. The public constructor for URL is now the URL.create() method. As the warning says, replace URL() with URL.create() 3. /usr/local/lib/python2.7/site-packages/pysqlite2/dbapi2.py:81: DeprecationWarning: Converters and adapters are deprecated. Please use only supported SQLite types. Any type mapping should happen in layer above this module. This warning comes from pysqlite. Python 2.7 does have a sqlite module so getting rid of pysqlite entirely. We only used it for a few tests in any case. 4. Finally, there's also a SAWarning about overlapping relationships in B and C models which are only used in tests. Add a backref to fix the warning. --- requirements_dev.txt | 1 - sir/amqp/handler.py | 4 ++-- sir/schema/__init__.py | 10 +++++----- sir/trigger_generation/__init__.py | 6 +++--- sir/util.py | 5 ++++- test/models.py | 3 +-- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index c4c3b21b..121c0c0e 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,4 +1,3 @@ pytest==4.6.9 pytest-cov==2.8.1 mock==3.0.5 -pysqlite==2.8.3 diff --git a/sir/amqp/handler.py b/sir/amqp/handler.py index 0467b14f..033b9ca5 100644 --- a/sir/amqp/handler.py +++ b/sir/amqp/handler.py @@ -378,7 +378,7 @@ def _index_by_fk(self, parsed_message): # to update the related entities. For 'one to many' relationships, the related # entity would have had an update trigger firing off to unlink the `index_entity` # before `index_entity` itself is deleted, so we can ignore those. - relevant_rels = dict((r.mapper.mapped_table.name, (list(r.local_columns)[0].name, list(r.remote_side)[0])) + relevant_rels = dict((r.mapper.persist_selectable.name, (list(r.local_columns)[0].name, list(r.remote_side)[0])) for r in class_mapper(index_model).mapper.relationships if r.direction.name == 'MANYTOONE') for core_name, path in update_map[parsed_message.table_name]: @@ -398,7 +398,7 @@ def _index_by_fk(self, parsed_message): related_model, new_path = second_last_model_in_path(entity.model, path) related_table_name = "" if related_model: - related_table_name = class_mapper(related_model).mapped_table.name + related_table_name = class_mapper(related_model).persist_selectable.name if related_table_name in relevant_rels: with db_session_ctx(self.db_session) as session: select_query = None diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index d12c1eca..57104b6b 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -611,16 +611,16 @@ def generate_update_map(): for core_name, entity in SCHEMA.items(): # Entity itself: # TODO(roman): See if the line below is necessary, if there is a better way to implement this. - mapped_table = class_mapper(entity.model).mapped_table.name - core_map[mapped_table] = core_name - paths[mapped_table].add((core_name, None)) - models[mapped_table] = entity.model + table_name = class_mapper(entity.model).persist_selectable.name + core_map[table_name] = core_name + paths[table_name].add((core_name, None)) + models[table_name] = entity.model # Related tables: for path in unique_split_paths([path for field in entity.fields for path in field.paths if field.trigger] + [path for path in entity.extrapaths or []]): model = last_model_in_path(entity.model, path) if model is not None: - name = class_mapper(model).mapped_table.name + name = class_mapper(model).persist_selectable.name paths[name].add((core_name, path)) if name not in models: models[name] = model diff --git a/sir/trigger_generation/__init__.py b/sir/trigger_generation/__init__.py index 209c9da8..01c5f3da 100644 --- a/sir/trigger_generation/__init__.py +++ b/sir/trigger_generation/__init__.py @@ -76,7 +76,7 @@ def get_trigger_tables(entities): for entity in [SCHEMA[name] for name in entities]: # Entity table itself mapped_class = class_mapper(entity.model) - tables[mapped_class.mapped_table.name] = { + tables[mapped_class.persist_selectable.name] = { "model": entity.model, "is_direct": True, "has_gid": mapped_class.has_property('gid'), @@ -87,7 +87,7 @@ def get_trigger_tables(entities): for path in field.paths if field.trigger]): model = last_model_in_path(entity.model, path) if model is not None: - table_name = class_mapper(model).mapped_table.name + table_name = class_mapper(model).persist_selectable.name if table_name not in tables: tables[table_name] = { "model": model, @@ -106,7 +106,7 @@ def write_triggers(trigger_file, function_file, model, is_direct, has_gid, **gen """ # Mapper defines correlation of model class attributes to database table columns mapper = class_mapper(model) - table_name = mapper.mapped_table.name + table_name = mapper.persist_selectable.name fk_columns = [list(r.local_columns)[0].name for r in mapper.relationships if r.direction.name == 'MANYTOONE'] if is_direct: diff --git a/sir/util.py b/sir/util.py index 32eb6c47..c281ed41 100644 --- a/sir/util.py +++ b/sir/util.py @@ -47,7 +47,10 @@ def engine(): for key in ["password", "host", "port"]: cdict[key] = cget(key) cdict["database"] = cget("dbname") - return create_engine(URL("postgresql", **cdict), server_side_cursors=False) + return create_engine( + URL.create("postgresql", **cdict), + server_side_cursors=False + ) def db_session(): diff --git a/test/models.py b/test/models.py index bcc6e316..515b5174 100644 --- a/test/models.py +++ b/test/models.py @@ -19,7 +19,6 @@ class B(Base): foo = Column(Integer) c_id = Column('c', Integer, ForeignKey("table_c.id")) composite_column = composite(Comp, foo, c_id) - c = relationship("C") class C(Base): @@ -30,4 +29,4 @@ class C(Base): __tablename__ = "table_c" id = Column(Integer, primary_key=True) bar = Column(Integer) - bs = relationship("B") + bs = relationship("B", backref="c") From 2f8862c4c59239159ad49b4b5a107f94dfb6ddde Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Sun, 16 Oct 2022 18:08:56 +0530 Subject: [PATCH 24/48] Fix overlapping relationship SAWarning in custom models The test logs are filled with warnings like: SAWarning: relationship 'AreaTag.area' will copy column area.id to column area_tag.area, which conflicts with relationship(s): 'CustomArea.tags' (copies area.id to area_tag.area). If this is not the intention, consider if these relationships should be linked with back_populates, or if viewonly=True should be applied to one or more if they are read-only. For the less common case that foreign key constraints are partially overlapping, the orm.foreign() annotation can be used to isolate the columns that should be written towards. To silence this warning, add the parameter 'overlaps="tags"' to the 'AreaTag.area' relationship. This particular warnings arises from `tags = relationship("AreaTag")` in CustomArea. The reason (AFAIU) is that `tags` relationship links `Area` to `AreaTag` and `area` in `AreaTag` links AreaTag back to `Area`. However, these relationships aren't linked with a backref so it isn't known to SQLAlchemy that both `relationship` properties represent the same relation. Possible fixes include using `back_ref` to make it known to SQLAlchemy that both relationships are same. That approach however needs changes in mbdata. In many cases like that of `Tag` and `Alias` relationships, it seems like a good idea to update mbdata and say add `tags` and `aliases` attributes directly to `Area` model and other entities. But there are a few cases like `artist_links` and `recording_links` in `CustomWork` which probably don't make sense to be added to mbdata and should continue to live in custom sir models. Another alternative approach to get rid of the warning as the message suggests is setting `viewonly=True`, since we only ever read the data in sir and never modify this option works fine for us. Updating mbdata likely needs to consider implications on other users of the library so currently I am not inclined to pursue the approach. Hence, going with the `viewonly=True` approach. --- sir/schema/modelext.py | 110 ++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/sir/schema/modelext.py b/sir/schema/modelext.py index 69a91d53..77fd4956 100644 --- a/sir/schema/modelext.py +++ b/sir/schema/modelext.py @@ -9,34 +9,33 @@ LinkRecordingWork, Medium, MediumCDTOC, Place, Recording, Release, ReleaseGroup, ReleaseLabel, ReleaseRaw, ReleaseTag, Series, Work, URL) -from sqlalchemy import exc as sa_exc, func, select +from sqlalchemy import func, select from sqlalchemy.orm import relationship, column_property from sqlalchemy.sql.expression import and_ -from warnings import simplefilter - -# Ignore SQLAlchemy's warnings that we're overriding some attributes -simplefilter(action="ignore", category=sa_exc.SAWarning) class CustomAnnotation(Annotation): - areas = relationship("AreaAnnotation") - artists = relationship("ArtistAnnotation") - events = relationship("EventAnnotation") - instruments = relationship("InstrumentAnnotation") - labels = relationship("LabelAnnotation") - places = relationship("PlaceAnnotation") - recordings = relationship("RecordingAnnotation") - releases = relationship("ReleaseAnnotation") - release_groups = relationship("ReleaseGroupAnnotation") - series = relationship("SeriesAnnotation") - works = relationship("WorkAnnotation") + areas = relationship("AreaAnnotation", viewonly=True) + artists = relationship("ArtistAnnotation", viewonly=True) + events = relationship("EventAnnotation", viewonly=True) + instruments = relationship("InstrumentAnnotation", viewonly=True) + labels = relationship("LabelAnnotation", viewonly=True) + places = relationship("PlaceAnnotation", viewonly=True) + recordings = relationship("RecordingAnnotation", viewonly=True) + releases = relationship("ReleaseAnnotation", viewonly=True) + release_groups = relationship("ReleaseGroupAnnotation", viewonly=True) + series = relationship("SeriesAnnotation", viewonly=True) + works = relationship("WorkAnnotation", viewonly=True) class CustomArea(Area): - aliases = relationship("AreaAlias") - area_links = relationship("LinkAreaArea", - primaryjoin="Area.id == LinkAreaArea.entity1_id") - tags = relationship("AreaTag") + aliases = relationship("AreaAlias", viewonly=True) + area_links = relationship( + "LinkAreaArea", + primaryjoin="Area.id == LinkAreaArea.entity1_id", + viewonly=True + ) + tags = relationship("AreaTag", viewonly=True) place_count = column_property(select([func.count(Place.id)]).where(Place.area_id == Area.id)) label_count = column_property(select([func.count(Label.id)]).where(Label.area_id == Area.id)) artist_count = column_property(select([func.count(Artist.id)]).where(Artist.area_id == Area.id)) @@ -47,8 +46,9 @@ class CustomArtist(Artist): begin_area = relationship('CustomArea', foreign_keys=[Artist.begin_area_id]) end_area = relationship('CustomArea', foreign_keys=[Artist.end_area_id]) - tags = relationship('ArtistTag') - artist_credit_names = relationship("ArtistCreditName", innerjoin=True) + tags = relationship('ArtistTag', viewonly=True) + artist_credit_names = relationship("ArtistCreditName", innerjoin=True, + viewonly=True) primary_aliases = column_property(select( [func.array_agg(ArtistAlias.name)]).where( and_(ArtistAlias.artist_id == Artist.id, @@ -57,89 +57,89 @@ class CustomArtist(Artist): class CustomArtistAlias(ArtistAlias): artist = relationship('Artist', foreign_keys=[ArtistAlias.artist_id], - innerjoin=True, backref="aliases") + innerjoin=True, backref="aliases", viewonly=True) class CustomEvent(Event): # still need to allow searching with place/area/artist aliases - aliases = relationship("EventAlias") - place_links = relationship("LinkEventPlace") - area_links = relationship("LinkAreaEvent") - artist_links = relationship("LinkArtistEvent") - tags = relationship("EventTag") + aliases = relationship("EventAlias", viewonly=True) + place_links = relationship("LinkEventPlace", viewonly=True) + area_links = relationship("LinkAreaEvent", viewonly=True) + artist_links = relationship("LinkArtistEvent", viewonly=True) + tags = relationship("EventTag", viewonly=True) class CustomInstrument(Instrument): - aliases = relationship("InstrumentAlias") - tags = relationship("InstrumentTag") + aliases = relationship("InstrumentAlias", viewonly=True) + tags = relationship("InstrumentTag", viewonly=True) class CustomLabel(Label): - aliases = relationship("LabelAlias") + aliases = relationship("LabelAlias", viewonly=True) area = relationship("CustomArea", foreign_keys=[Label.area_id]) - tags = relationship("LabelTag") + tags = relationship("LabelTag", viewonly=True) release_count = column_property(select([func.count(ReleaseLabel.id)]).where(ReleaseLabel.label_id == Label.id)) class CustomMediumCDToc(MediumCDTOC): medium = relationship('Medium', foreign_keys=[MediumCDTOC.medium_id], - innerjoin=True, backref="cdtocs") + innerjoin=True, backref="cdtocs", viewonly=True) class CustomPlace(Place): area = relationship("CustomArea", foreign_keys=[Place.area_id]) - aliases = relationship("PlaceAlias") + aliases = relationship("PlaceAlias", viewonly=True) class CustomRecording(Recording): - aliases = relationship("RecordingAlias") - first_release_date = relationship("RecordingFirstReleaseDate") - tags = relationship("RecordingTag") + aliases = relationship("RecordingAlias", viewonly=True) + first_release_date = relationship("RecordingFirstReleaseDate", viewonly=True) + tags = relationship("RecordingTag", viewonly=True) class CustomReleaseGroup(ReleaseGroup): - aliases = relationship("ReleaseGroupAlias") - first_release_date = relationship("ReleaseGroupMeta") - releases = relationship("Release") - tags = relationship("ReleaseGroupTag") + aliases = relationship("ReleaseGroupAlias", viewonly=True) + first_release_date = relationship("ReleaseGroupMeta", viewonly=True) + releases = relationship("Release", viewonly=True) + tags = relationship("ReleaseGroupTag", viewonly=True) release_count = column_property(select([func.count(Release.id)]).where(Release.release_group_id == ReleaseGroup.id)) class CustomRelease(Release): - aliases = relationship("ReleaseAlias") - asin = relationship("ReleaseMeta") + aliases = relationship("ReleaseAlias", viewonly=True) + asin = relationship("ReleaseMeta", viewonly=True) medium_count = column_property(select([func.count(Medium.id)]).where(Medium.release_id == Release.id)) class CustomReleaseRaw(ReleaseRaw): - discids = relationship("CDTOCRaw") + discids = relationship("CDTOCRaw", viewonly=True) class CustomReleaseTag(ReleaseTag): release = relationship('Release', foreign_keys=[ReleaseTag.release_id], - innerjoin=True, backref="tags") + innerjoin=True, backref="tags", viewonly=True) class CustomSeries(Series): - aliases = relationship("SeriesAlias") - tags = relationship("SeriesTag") + aliases = relationship("SeriesAlias", viewonly=True) + tags = relationship("SeriesTag", viewonly=True) class CustomWork(Work): - aliases = relationship("WorkAlias") - artist_links = relationship("LinkArtistWork") - tags = relationship("WorkTag") - languages = relationship("WorkLanguage") - recording_links = relationship("LinkRecordingWork") + aliases = relationship("WorkAlias", viewonly=True) + artist_links = relationship("LinkArtistWork", viewonly=True) + tags = relationship("WorkTag", viewonly=True) + languages = relationship("WorkLanguage", viewonly=True) + recording_links = relationship("LinkRecordingWork", viewonly=True) recording_count = column_property(select([func.count(LinkRecordingWork.id)]).where(LinkRecordingWork.work_id == Work.id)) class CustomURL(URL): - artist_links = relationship("LinkArtistURL") - release_links = relationship("LinkReleaseURL") + artist_links = relationship("LinkArtistURL", viewonly=True) + release_links = relationship("LinkReleaseURL", viewonly=True) class CustomLinkAttribute(LinkAttribute): link = relationship('Link', foreign_keys=[LinkAttribute.link_id], innerjoin=True, - backref="attributes") + backref="attributes", viewonly=True) From 9cb44fa04f4da2ec95f84c87ec8687ec9d2ab323 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Sun, 16 Oct 2022 18:12:52 +0530 Subject: [PATCH 25/48] Fix implicitly coercing SELECT object warning The test logs are filled with warnings like: SAWarning: implicitly coercing SELECT object to scalar subquery; please use the .scalar_subquery() method to produce a scalar subquery. These warnings come from code like which calculate various counts: `label_count = column_property(select([func.count(Label.id)]).where(Label.area_id == Area.id))` Fix is simple append .scalar_subquery() to the queries. --- sir/schema/modelext.py | 54 +++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/sir/schema/modelext.py b/sir/schema/modelext.py index 77fd4956..b7f018d9 100644 --- a/sir/schema/modelext.py +++ b/sir/schema/modelext.py @@ -36,9 +36,21 @@ class CustomArea(Area): viewonly=True ) tags = relationship("AreaTag", viewonly=True) - place_count = column_property(select([func.count(Place.id)]).where(Place.area_id == Area.id)) - label_count = column_property(select([func.count(Label.id)]).where(Label.area_id == Area.id)) - artist_count = column_property(select([func.count(Artist.id)]).where(Artist.area_id == Area.id)) + place_count = column_property( + select([func.count(Place.id)]). + where(Place.area_id == Area.id). + scalar_subquery() + ) + label_count = column_property( + select([func.count(Label.id)]). + where(Label.area_id == Area.id). + scalar_subquery() + ) + artist_count = column_property( + select([func.count(Artist.id)]). + where(Artist.area_id == Area.id). + scalar_subquery() + ) class CustomArtist(Artist): @@ -49,10 +61,14 @@ class CustomArtist(Artist): tags = relationship('ArtistTag', viewonly=True) artist_credit_names = relationship("ArtistCreditName", innerjoin=True, viewonly=True) - primary_aliases = column_property(select( - [func.array_agg(ArtistAlias.name)]).where( - and_(ArtistAlias.artist_id == Artist.id, - ArtistAlias.primary_for_locale == True))) + primary_aliases = column_property( + select([func.array_agg(ArtistAlias.name)]). + where(and_( + ArtistAlias.artist_id == Artist.id, + ArtistAlias.primary_for_locale == True + )). + scalar_subquery() + ) class CustomArtistAlias(ArtistAlias): @@ -78,7 +94,11 @@ class CustomLabel(Label): aliases = relationship("LabelAlias", viewonly=True) area = relationship("CustomArea", foreign_keys=[Label.area_id]) tags = relationship("LabelTag", viewonly=True) - release_count = column_property(select([func.count(ReleaseLabel.id)]).where(ReleaseLabel.label_id == Label.id)) + release_count = column_property( + select([func.count(ReleaseLabel.id)]). + where(ReleaseLabel.label_id == Label.id). + scalar_subquery() + ) class CustomMediumCDToc(MediumCDTOC): @@ -102,13 +122,21 @@ class CustomReleaseGroup(ReleaseGroup): first_release_date = relationship("ReleaseGroupMeta", viewonly=True) releases = relationship("Release", viewonly=True) tags = relationship("ReleaseGroupTag", viewonly=True) - release_count = column_property(select([func.count(Release.id)]).where(Release.release_group_id == ReleaseGroup.id)) + release_count = column_property( + select([func.count(Release.id)]). + where(Release.release_group_id == ReleaseGroup.id). + scalar_subquery() + ) class CustomRelease(Release): aliases = relationship("ReleaseAlias", viewonly=True) asin = relationship("ReleaseMeta", viewonly=True) - medium_count = column_property(select([func.count(Medium.id)]).where(Medium.release_id == Release.id)) + medium_count = column_property( + select([func.count(Medium.id)]). + where(Medium.release_id == Release.id). + scalar_subquery() + ) class CustomReleaseRaw(ReleaseRaw): @@ -131,7 +159,11 @@ class CustomWork(Work): tags = relationship("WorkTag", viewonly=True) languages = relationship("WorkLanguage", viewonly=True) recording_links = relationship("LinkRecordingWork", viewonly=True) - recording_count = column_property(select([func.count(LinkRecordingWork.id)]).where(LinkRecordingWork.work_id == Work.id)) + recording_count = column_property( + select([func.count(LinkRecordingWork.id)]). + where(LinkRecordingWork.work_id == Work.id). + scalar_subquery() + ) class CustomURL(URL): From 63aa1cb0aa9863d77719eaa41950dd3d90760dd8 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Tue, 25 Oct 2022 13:52:40 +0530 Subject: [PATCH 26/48] Remove unused import --- sir/schema/modelext.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sir/schema/modelext.py b/sir/schema/modelext.py index b7f018d9..38c6c121 100644 --- a/sir/schema/modelext.py +++ b/sir/schema/modelext.py @@ -5,7 +5,7 @@ that are used in SIR. """ from mbdata.models import (Annotation, Area, Artist, ArtistAlias, Event, - Instrument, Label, LinkAttribute, LinkAttributeType, + Instrument, Label, LinkAttribute, LinkRecordingWork, Medium, MediumCDTOC, Place, Recording, Release, ReleaseGroup, ReleaseLabel, ReleaseRaw, ReleaseTag, Series, Work, URL) From cc3edade5ce94f72fe0f5824ac09a613c66eb2be Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Tue, 25 Oct 2022 18:35:30 +0530 Subject: [PATCH 27/48] Revert "Use load_only rather than many defer calls" This reverts commit 8b16c86d. The SQL queries generated using load_only are less optimal as compared to using defer_everything_but. The reason likely stems from a difference in which columns ways eagerly load. For now, just revert to the old way to avoid performance regressions. --- sir/schema/searchentities.py | 45 ++++++++++++++----------------- test/test_querying.py | 52 +++++++++++++++++++++++++++++++++++- 2 files changed, 71 insertions(+), 26 deletions(-) diff --git a/sir/schema/searchentities.py b/sir/schema/searchentities.py index df04c838..68a84bdb 100644 --- a/sir/schema/searchentities.py +++ b/sir/schema/searchentities.py @@ -32,17 +32,6 @@ def is_composite_column(model, colname): return (hasattr(attr, "property") and isinstance(attr.property, CompositeProperty)) -def is_relationship_column(model, colname): - """ - Checks if a models attribute is a relationship column. - - :param model: A :ref:`declarative ` class. - :param str colname: The column name. - :rtype: bool - """ - attr = getattr(model, colname) - return (hasattr(attr, "property") and - isinstance(attr.property, RelationshipProperty)) def merge_paths(field_paths): """ @@ -75,6 +64,23 @@ def merge_paths(field_paths): current_path_dict = new_path_dict return paths + +def defer_everything_but(mapper, load, *columns): + primary_keys = [c.name for c in mapper.primary_key] + for prop in mapper.iterate_properties: + if hasattr(prop, "columns"): + key = prop.key + if (key not in columns and key[:-3] not in columns and + key[-3:] != "_id" and key != "position" and + key not in primary_keys): + # We need the _id columns for subqueries and joins + # Position is needed because sqla automatically orders by + # artist_credit_name.position + logger.debug("Deferring %s on %s", key, mapper) + load.defer(key) + return load + + class SearchField(object): """Represents a searchable field. @@ -217,23 +223,12 @@ def build_entity_query(self): required_columns.remove(composite_column) required_columns.extend(composite_parts) - # load_only cannot operate on relationship columns - # so we need to remove those before running it - relationship_columns = filter( - partial(is_relationship_column, model), - required_columns) - for relationship_column in relationship_columns: - required_columns.remove(relationship_column) - - # Remove __tablename__ from column list because if it - # ends up there because its not a column - if '__tablename__' in required_columns: - required_columns.remove('__tablename__') - logger.debug("Loading only %s on %s", required_columns, model) - load.load_only(*required_columns) + load = defer_everything_but(class_mapper(model), + load, + *required_columns) query = query.options(load) if self.extraquery is not None: query = self.extraquery(query) diff --git a/test/test_querying.py b/test/test_querying.py index 31be1231..fddbd481 100644 --- a/test/test_querying.py +++ b/test/test_querying.py @@ -6,10 +6,60 @@ from collections import defaultdict from sqlalchemy.orm.properties import RelationshipProperty from sir.querying import iterate_path_values -from sir.schema.searchentities import merge_paths +from sir.schema.searchentities import defer_everything_but, merge_paths from sir.schema import generate_update_map, SCHEMA from sir.trigger_generation.paths import second_last_model_in_path + +class DeferEverythingButTest(unittest.TestCase): + def setUp(self): + mapper = helpers.Object() + mapper.iterate_properties = [] + pk1 = helpers.Object() + pk1.name = "pk1" + pk2 = helpers.Object() + pk2.name = "pk2" + mapper.primary_key = [pk1, pk2] + + self.mapper = mapper + + prop = helpers.Object() + prop.columns = "" + self.prop = prop + self.mapper.iterate_properties.append(prop) + + self.load = mock.Mock() + self.required_columns = ["key", "key2"] + + def test_plain_column_called(self): + self.prop.key = "foo" + load = defer_everything_but(self.mapper, self.load, *self.required_columns) + load.defer.assert_called_once_with("foo") + + def test_plain_column_not_called(self): + self.prop.key = "key" + load = defer_everything_but(self.mapper, self.load, *self.required_columns) + self.assertFalse(load.defer.called) + + def test_id_column(self): + self.prop.key = "foo_id" + load = defer_everything_but(self.mapper, self.load, + *self.required_columns) + self.assertFalse(load.defer.called) + + def test_position_column(self): + self.prop.key = "position" + load = defer_everything_but(self.mapper, self.load, + *self.required_columns) + self.assertFalse(load.defer.called) + + def test_primary_key_always_loaded(self): + self.prop.key = "pk1" + load = defer_everything_but(self.mapper, self.load, + *self.required_columns) + self.assertFalse(load.defer.called) + + class IteratePathValuesTest(unittest.TestCase): @classmethod def setUpClass(cls): From 6599b4c54b78e9f346fdd2d53599e28acdcb0431 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Tue, 25 Oct 2022 18:37:35 +0530 Subject: [PATCH 28/48] Do not defer CompositeProperty Trying to specify the load type for a composite property is erroneous. So filter those properties. --- sir/schema/searchentities.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sir/schema/searchentities.py b/sir/schema/searchentities.py index 68a84bdb..8d372d0e 100644 --- a/sir/schema/searchentities.py +++ b/sir/schema/searchentities.py @@ -68,7 +68,7 @@ def merge_paths(field_paths): def defer_everything_but(mapper, load, *columns): primary_keys = [c.name for c in mapper.primary_key] for prop in mapper.iterate_properties: - if hasattr(prop, "columns"): + if hasattr(prop, "columns") and not isinstance(prop, CompositeProperty): key = prop.key if (key not in columns and key[:-3] not in columns and key[-3:] != "_id" and key != "position" and From 05c277d873e936e33e6037cd8c136953ccaa6a0c Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Thu, 27 Oct 2022 19:20:57 +0530 Subject: [PATCH 29/48] Upgrade SQLAlchemy to latest version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f2a3dc74..6601a8c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ git+https://github.com/metabrainz/mb-rngpy.git@v-2.20201112.0#egg=mb-rngpy psycopg2-binary==2.8.4 retrying==1.3.3 pysolr==3.8.1 -sqlalchemy==1.4.23 +sqlalchemy==1.4.41 requests==2.22.0 ujson==1.35 sentry-sdk==1.3.1 From 99b3208533ce8322ce8d936242ce526aec7d2a76 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Fri, 28 Oct 2022 12:36:36 +0530 Subject: [PATCH 30/48] Eagerly load area fields We access begin_date, end_date and ended fields from area fields in convert_area_inner which is called for all area types. This function is only called by wscompat converter so we only need to add these to extrapaths and not to the main fields in the entity. --- sir/schema/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 57104b6b..2983c344 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -111,7 +111,6 @@ F("begin", "begin_date", transformfunc=tfs.index_partialdate_to_string), F("end", "end_date", transformfunc=tfs.index_partialdate_to_string), F("ended", "ended", transformfunc=tfs.ended_to_string), - F("area", ["area.name", "area.aliases.name"]), F("beginarea", ["begin_area.name", "begin_area.aliases.name"]), F("country", "area.iso_3166_1_codes.code"), @@ -133,6 +132,10 @@ "aliases.locale", "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", "begin_area.gid", "area.gid", "end_area.gid", + "area.begin_date", "area.end_date", "area.ended", + "begin_area.begin_date", "begin_area.end_date", + "begin_area.ended", "end_area.begin_date", + "end_area.end_date", "end_area.ended", "gender.gid", "type.gid"] ) From dd87d0d1b3a26fd8b989e490bd8071e3c53caa4c Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 31 Oct 2022 22:09:10 +0530 Subject: [PATCH 31/48] Eagerly load artist.sort_name in event indexing convert_event used as wscompat converter for event core calls convert_artist_simple as well which accesses sort_name so eagerly load it. --- sir/schema/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 2983c344..8f4f5224 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -200,6 +200,7 @@ "artist_links.artist.gid", "artist_links.artist.name", "artist_links.artist.comment", + "artist_links.artist.sort_name", "artist_links.link.link_type.name", "artist_links.link.link_type.gid", "artist_links.link.attributes.attribute_type.name", From 45085daa5cbbe97d18e62861dab2b7c1761cb2d6 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Tue, 1 Nov 2022 00:52:31 +0530 Subject: [PATCH 32/48] Use mapper attribute instead of hybrid property Logging SQL queries shows that when using the hybrid property like artist of LinkArtistPlace model instead of the actual attribute like entity0 does not trigger eager loading of the db column. Therefore, use entity0 instead of artist in artist_links, entity0 instead of area in area_links and entity1 instead of place in place_links. --- sir/schema/__init__.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 8f4f5224..58c4ff36 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -191,22 +191,22 @@ "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", - "area_links.area.name", - "area_links.area.gid", + "area_links.entity0.name", + "area_links.entity0.gid", "area_links.link.link_type.name", "area_links.link.link_type.gid", "area_links.link.attributes.attribute_type.name", "area_links.link.attributes.attribute_type.gid", - "artist_links.artist.gid", - "artist_links.artist.name", - "artist_links.artist.comment", - "artist_links.artist.sort_name", + "artist_links.entity0.gid", + "artist_links.entity0.name", + "artist_links.entity0.comment", + "artist_links.entity0.sort_name", "artist_links.link.link_type.name", "artist_links.link.link_type.gid", "artist_links.link.attributes.attribute_type.name", "artist_links.link.attributes.attribute_type.gid", - "place_links.place.gid", - "place_links.place.name", + "place_links.entity1.gid", + "place_links.entity1.name", "place_links.link.link_type.name", "place_links.link.link_type.gid", "place_links.link.attributes.attribute_type.name", From da0da0792483b6727af068701e10d7bd6a228178 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Tue, 1 Nov 2022 02:31:48 +0530 Subject: [PATCH 33/48] Use mapper attribute instead of hybrid property Logging SQL queries shows that when using the hybrid property like artist of LinkArtistUrl model instead of the actual attribute like entity0 does not trigger eager loading of the db column. Therefore, use entity0 instead of artist in artist_links and entity0 instead of release in release_links. --- sir/schema/__init__.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 58c4ff36..81b58c16 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -513,17 +513,17 @@ ], 1.5, convert.convert_url, - extrapaths=["artist_links.artist.gid", - "artist_links.artist.name", - "artist_links.artist.comment", - "artist_links.artist.sort_name", + extrapaths=["artist_links.entity0.gid", + "artist_links.entity0.name", + "artist_links.entity0.comment", + "artist_links.entity0.sort_name", "artist_links.link.link_type.name", "artist_links.link.link_type.gid", "artist_links.link.attributes.attribute_type.name", "artist_links.link.attributes.attribute_type.gid", - "release_links.release.gid", - "release_links.release.name", - "release_links.release.comment", + "release_links.entity0.gid", + "release_links.entity0.name", + "release_links.entity0.comment", "release_links.link.link_type.name", "release_links.link.link_type.gid", "release_links.link.attributes.attribute_type.name", From 16577dbee49242b61f8ad08fe7a9c5c2b67b3eac Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Fri, 4 Nov 2022 13:54:47 +0530 Subject: [PATCH 34/48] Eagerly load artist_alias.gid in release group indexing (#148) convert_release_group used as wscompat converter for release group core calls convert_alias as well which accesses alias.gid so eagerly load it. --- sir/schema/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 81b58c16..b9337ff2 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -462,6 +462,7 @@ "artist_credit.artists.artist.aliases.primary_for_locale", "artist_credit.artists.artist.aliases.sort_name", "artist_credit.artists.artist.aliases.type.id", + "artist_credit.artists.artist.aliases.type.gid", "artist_credit.artists.artist.aliases.type.name", "artist_credit.artists.artist.gid", "artist_credit.artists.artist.sort_name", From 89ef1abb1348526928102d3896cf9460c2f82f4e Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 2 Nov 2022 02:07:01 +0530 Subject: [PATCH 35/48] Eagerly load artist.comment in release indexing convert_release used as wscompat converter for release core calls convert_artist_simple as well which accesses comment so eagerly load it. --- sir/schema/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index b9337ff2..834c3364 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -416,6 +416,7 @@ "artist_credit.artists.artist.aliases.type.gid", "artist_credit.artists.artist.gid", "artist_credit.artists.artist.sort_name", + "artist_credit.artists.artist.comment", "country_dates.country.area.gid", "country_dates.country.area.name", "country_dates.country.area.iso_3166_1_codes.code", From 80954d6953da4b74028b3b5a73fada5d81f910e0 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Wed, 2 Nov 2022 02:17:55 +0530 Subject: [PATCH 36/48] Eagerly load packaging.gid in release indexing convert_release used as wscompat converter for release core calls convert_release_packaging as well which accesses comment so eagerly load it. --- sir/schema/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 834c3364..62e5eaee 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -429,6 +429,7 @@ "release_group.type.gid", "release_group.secondary_types.secondary_type.gid", "status.gid", + "packaging.gid", "language.iso_code_3", "tags.count"] ) From a06267254e21ae89b1a66f52df14898975270124 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 7 Nov 2022 13:09:56 +0530 Subject: [PATCH 37/48] Use mapper attribute instead of hybrid property Logging SQL queries shows that when using the hybrid property like area0 of LinkAreaArea model instead of the actual attribute like entity0 does not trigger eager loading of the db column. Therefore, use entity0 instead of area0 in area_links. --- sir/schema/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 62e5eaee..6cf860b8 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -86,12 +86,12 @@ "aliases.sort_name", "aliases.type.gid", "aliases.locale", "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", - "area_links.area0.name", - "area_links.area0.gid", - "area_links.area0.begin_date", - "area_links.area0.end_date", - "area_links.area0.type.id", - "area_links.area0.type.gid", + "area_links.entity0.name", + "area_links.entity0.gid", + "area_links.entity0.begin_date", + "area_links.entity0.end_date", + "area_links.entity0.type.id", + "area_links.entity0.type.gid", "area_links.link.link_type.name", "area_links.link.link_type.gid", "area_links.link.attributes.attribute_type.name", From a596aa63c636c5c2edc9a8770b7f05fcc6eec4ab Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 7 Nov 2022 13:13:18 +0530 Subject: [PATCH 38/48] Eagerly load area0.type.name and area0.ended convert_area calls convert_area_relation_list which in turn calls convert_area_inner on the related area that accesses these attributes so eagerly load these for performance. --- sir/schema/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 6cf860b8..fb73bcc8 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -90,8 +90,10 @@ "area_links.entity0.gid", "area_links.entity0.begin_date", "area_links.entity0.end_date", + "area_links.entity0.ended", "area_links.entity0.type.id", "area_links.entity0.type.gid", + "area_links.entity0.type.name", "area_links.link.link_type.name", "area_links.link.link_type.gid", "area_links.link.attributes.attribute_type.name", From ade0073ddd450e641599d56af1a1ca4762aef83c Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 7 Nov 2022 14:00:34 +0530 Subject: [PATCH 39/48] Eagerly load area attributes in place indexing convert_place used as wscompat converter for place core calls convert_area_inner as well which accesses area.type.gid, area.type.name, area.begin_date, area.end_date, area.ended so eagerly load it. --- sir/schema/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index fb73bcc8..2225ba21 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -286,7 +286,9 @@ "aliases.type.gid", "aliases.sort_name", "aliases.locale", "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", - "area.gid", "type.gid"] + "area.gid", "area.type.gid", "area.type.name", + "area.begin_date", "area.end_date", "area.ended", + "type.gid"] ) From c93c640321db58ba2e2b719478dd3734870c125c Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 7 Nov 2022 21:51:00 +0530 Subject: [PATCH 40/48] Fix loading of release-group first_release_date The change amends the implementation from SEARCH-319. The CustomReleaseGroup model has 2 relationships to ReleaseGroupMeta table while stores first_release_date. 1. `meta` - this relationship is established on `ReleaseGroup` model through a [backref](https://github.com/acoustid/mbdata/blob/bbe303865e4cec3f83a65ce29f0d3468c729173e/mbdata/models.py#L8153) in `ReleaseGroupMeta` table. 2. `first_release_date` - we add this relationship to `ReleaseGroup` in `CustomReleaseGroup` in [sir](https://github.com/metabrainz/sir/blob/80954d6953da4b74028b3b5a73fada5d81f910e0/sir/schema/modelext.py#L122). Ideally, we would have only one relationship: the one in mbdata. However, using `meta` relationship leads to an error. Investigating the error, I found that the `meta` relationship is created using `uselist=False` parameter. The logic in [iterate_path_values](https://github.com/metabrainz/sir/blob/80954d6953da4b74028b3b5a73fada5d81f910e0/sir/querying.py#L78-L85) does not support [`ONETOONE`](https://docs.sqlalchemy.org/en/14/orm/basic_relationships.html#one-to-one) relationships that `uselist=False` creates. Until `iterate_path_values` is updated we need to keep using the `first_release_date` relationship. Accordingly, update the logic in convert_release_group converter. This should also likely speed up indexing as well. --- sir/wscompat/convert.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sir/wscompat/convert.py b/sir/wscompat/convert.py index 884776a2..7e90e5d0 100644 --- a/sir/wscompat/convert.py +++ b/sir/wscompat/convert.py @@ -1125,8 +1125,11 @@ def convert_release_group(obj): if obj.comment: rg.set_disambiguation(obj.comment) - if obj.meta.first_release_date: - rg.set_first_release_date(partialdate_to_string(obj.meta.first_release_date)) + if obj.first_release_date and len(obj.first_release_date) > 0\ + and obj.first_release_date[0].first_release_date: + rg.set_first_release_date( + partialdate_to_string(obj.first_release_date[0].first_release_date) + ) if obj.type is not None: rg.set_primary_type(convert_release_group_primary_type(obj.type)) From 4b0cab7f2ddf14c9d9baca630a6571c6f8e60a01 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 7 Nov 2022 22:15:53 +0530 Subject: [PATCH 41/48] Fix loading of recording first_release_date The change amends the implementation from SEARCH-218. The CustomRecording model has 2 relationships to RecordingFirstReleaseDate table which stores first_release_date. 1. `first_release` - this relationship is established on `Recording` model through a [backref](https://github.com/acoustid/mbdata/blob/bbe303865e4cec3f83a65ce29f0d3468c729173e/mbdata/models.py#L1616) in `RecordingFirstReleaseDate` table. 2. `first_release_date` - we add this relationship to `Recording` in `CustomRecording` in [sir](https://github.com/metabrainz/sir/blob/ade0073ddd450e641599d56af1a1ca4762aef83c/sir/schema/modelext.py#L116). Ideally, we would have only one relationship: the one in mbdata. However, using `first_release` relationship leads to an error. Investigating the error, I found that the `first_release` relationship is created using `uselist=False` parameter. The logic in [iterate_path_values](https://github.com/metabrainz/sir/blob/80954d6953da4b74028b3b5a73fada5d81f910e0/sir/querying.py#L78-L85) does not support [`ONETOONE`](https://docs.sqlalchemy.org/en/14/orm/basic_relationships.html#one-to-one) relationships that `uselist=False` creates. Until `iterate_path_values` is updated we need to keep using the `first_release_date` relationship. Accordingly, update the logic in convert_recording converter. This should also likely speed up indexing as well. --- sir/wscompat/convert.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sir/wscompat/convert.py b/sir/wscompat/convert.py index 7e90e5d0..4adff9c9 100644 --- a/sir/wscompat/convert.py +++ b/sir/wscompat/convert.py @@ -1026,8 +1026,11 @@ def convert_recording(obj): if obj.comment: recording.set_disambiguation(obj.comment) - if obj.first_release is not None and obj.first_release.date is not None: - recording.set_first_release_date(partialdate_to_string(obj.first_release.date)) + if obj.first_release_date and len(obj.first_release_date) > 0\ + and obj.first_release_date[0].date: + recording.set_first_release_date( + partialdate_to_string(obj.first_release_date[0].date) + ) recording.set_length(obj.length) From a39d96892f342177beafc29fb65139d041fc9100 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 7 Nov 2022 22:38:00 +0530 Subject: [PATCH 42/48] Fix loading of release amazon asin The change amends the implementation from SEARCH-494. The CustomRelease model has 2 relationships to ReleaseMeta table which stores amazon asin. 1. `meta` - this relationship is established on `Release` model through a [backref](https://github.com/acoustid/mbdata/blob/bbe303865e4cec3f83a65ce29f0d3468c729173e/mbdata/models.py#L7872) in `RecordingFirstReleaseDate` table. 2. `asin` - we add this relationship to `Release` in `CustomRelease` in [sir](https://github.com/metabrainz/sir/blob/ade0073ddd450e641599d56af1a1ca4762aef83c/sir/schema/modelext.py#L134). Ideally, we would have only one relationship: the one in mbdata. However, using `meta` relationship leads to an error. Investigating the error, I found that the `meta` relationship is created using `uselist=False` parameter. The logic in [iterate_path_values](https://github.com/metabrainz/sir/blob/80954d6953da4b74028b3b5a73fada5d81f910e0/sir/querying.py#L78-L85) does not support [`ONETOONE`](https://docs.sqlalchemy.org/en/14/orm/basic_relationships.html#one-to-one) relationships that `uselist=False` creates. Until `iterate_path_values` is updated we need to keep using the `asin` relationship. Accordingly, update the logic in convert_release converter. This should also likely speed up indexing as well. --- sir/wscompat/convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sir/wscompat/convert.py b/sir/wscompat/convert.py index 4adff9c9..cacefad1 100644 --- a/sir/wscompat/convert.py +++ b/sir/wscompat/convert.py @@ -1112,8 +1112,8 @@ def convert_release(obj): if tr is not None: release.set_text_representation(tr) - if obj.meta.amazon_asin is not None: - release.set_asin(obj.meta.amazon_asin) + if obj.asin and len(obj.asin) > 0 and obj.asin[0].amazon_asin: + release.set_asin(obj.asin[0].amazon_asin) return release From 17a8340c79618bb4ba5627f4e3bebfb9a2480b01 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 7 Nov 2022 10:52:48 +0530 Subject: [PATCH 43/48] Eagerly load area_type related attributes for artist core convert_artist calls convert_area_inner on the area that accesses these area.type.name and area.type.gid attributes so eagerly load these for performance. --- sir/schema/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 2225ba21..1bb5d7ab 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -138,7 +138,9 @@ "begin_area.begin_date", "begin_area.end_date", "begin_area.ended", "end_area.begin_date", "end_area.end_date", "end_area.ended", - "gender.gid", + "gender.gid", "area.type.gid", "area.type.name", + "begin_area.type.gid", "begin_area.type.name", + "end_area.type.gid", "end_area.type.name", "type.gid"] ) From e9f618106c72dbe8c1c930789bf766322544b88f Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Tue, 8 Nov 2022 00:44:14 +0530 Subject: [PATCH 44/48] Eagerly load area attributes in label indexing convert_label used as wscompat converter for label core calls convert_area_inner as well which accesses area.begin_date, area.end_date, area.ended so eagerly load it. Also, add one more test case where these fields aren't NULL. --- sir/schema/__init__.py | 1 + test/sql/label.sql | 13 +++++++------ test/test_indexing_real_data.py | 12 +++++++++++- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 1bb5d7ab..cc9beb2a 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -263,6 +263,7 @@ "aliases.locale", "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", "area.gid", "area.type.name", "area.type.gid", + "area.begin_date", "area.end_date", "area.ended", "tags.count", "type.gid" ] ) diff --git a/test/sql/label.sql b/test/sql/label.sql index e0014b39..ba0dbf83 100644 --- a/test/sql/label.sql +++ b/test/sql/label.sql @@ -1,13 +1,14 @@ -INSERT INTO area (id, gid, name, type) VALUES - (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1); -INSERT INTO country_area (area) VALUES (221); -INSERT INTO iso_3166_1 (area, code) VALUES (221, 'GB'); +INSERT INTO area (id, gid, name, type, begin_date_year, end_date_year, ended) VALUES + (221, '8a754a16-0027-3a29-b6d7-2b40ea0481ed', 'United Kingdom', 1, NULL, NULL, 'f'), + (243, '32f90933-b4b4-3248-b98c-e573d5329f57', 'Soviet Union', 1, 1922, 1991, 't'); +INSERT INTO country_area (area) VALUES (221), (243); +INSERT INTO iso_3166_1 (area, code) VALUES (221, 'GB'), (243, 'SU'); INSERT INTO label (id, gid, name, type, area, label_code, begin_date_year, begin_date_month, begin_date_day, end_date_year, end_date_month, end_date_day, comment) - VALUES (3, '46f0f4cd-8aab-4b33-b698-f459faf64190', 'Warp Records', 3, 221, 2070, - 1989, 02, 03, 2008, 05, 19, 'Sheffield based electronica label'); + VALUES (3, '46f0f4cd-8aab-4b33-b698-f459faf64190', 'Warp Records', 3, 221, 2070, 1989, 02, 03, 2008, 05, 19, 'Sheffield based electronica label'), + (135155, '449ddb7e-4e92-41eb-a683-5bbcc7fd7d4a', 'U.S.S.R. Ministry of Culture', NULL, 243, NULL, 1953, 3, 15, 1991, 11, 27, ''); INSERT INTO label (id, gid, name) VALUES (2, 'f2a9a3c0-72e3-11de-8a39-0800200c9a66', 'To Merge'); diff --git a/test/test_indexing_real_data.py b/test/test_indexing_real_data.py index 929a6f25..5ba2fc5f 100644 --- a/test/test_indexing_real_data.py +++ b/test/test_indexing_real_data.py @@ -223,7 +223,17 @@ def test_index_label(self): 'mbid': 'f2a9a3c0-72e3-11de-8a39-0800200c9a66', '_store': 'To MergeTo Mergefalse', 'label': u'To Merge' - } + }, + { + 'begin': '1953-03-15', + 'end': '1991-11-27', + 'area': u'Soviet Union', + 'country': u'SU', + 'label': u'U.S.S.R. Ministry of Culture', + 'ended': 'true', + 'mbid': '449ddb7e-4e92-41eb-a683-5bbcc7fd7d4a', + '_store': 'U.S.S.R. Ministry of CultureU.S.S.R. Ministry of CultureSUSoviet UnionSoviet Union19221991true1953-03-151991-11-27true' + } ] self._test_index_entity("label", expected) From ee18b5aafa6d33ef21f92faa6a4baf341dc12328 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 14 Nov 2022 17:04:29 +0530 Subject: [PATCH 45/48] Add test case for work that includes recording links --- test/sql/work.sql | 13 +++++++++++++ test/test_indexing_real_data.py | 9 +++++++++ 2 files changed, 22 insertions(+) diff --git a/test/sql/work.sql b/test/sql/work.sql index 5755fefd..a0d9e02c 100644 --- a/test/sql/work.sql +++ b/test/sql/work.sql @@ -27,6 +27,19 @@ INSERT INTO iswc (id, work, iswc) VALUES (4, 2, 'T-000.000.002-0'); INSERT INTO work_gid_redirect VALUES ('28e73402-5666-4d74-80ab-c3734dc699ea', 1); +INSERT INTO work (id, gid, name, type) VALUES (15, '640b17f5-4aa3-3fb1-8c6c-4792458e8a56', 'Blue Lines', 17); + +INSERT INTO artist (id, gid, name, sort_name) VALUES (4, '10adbe5e-a2c0-4bf3-8249-2b4cbf6e6ca8', 'Massive Attack', 'Massive Attack'); +INSERT INTO artist_credit (id, name, artist_count, gid) VALUES (4, 'Massive Attack', 1, '261f02c2-75a6-313f-9dd8-1716f73f3ce8'); +INSERT INTO artist_credit_name (artist_credit, position, artist, name) VALUES (4, 0, 4, 'Massive Attack'); + +INSERT INTO recording (id, gid, name, artist_credit, length) +VALUES (15, 'bef81f8f-4bcf-4308-bd66-e57018169a94', 'Blue Lines', 4, 261533), + (754567, 'a2383c02-2430-4294-9177-ef799a6eca31', 'Blue Lines', 4, 265546); + +INSERT INTO link (id, link_type) VALUES (27124, 278); +INSERT INTO l_recording_work (id, link, entity0, entity1) VALUES (279733, 27124, 15, 15), (966013, 27124, 754567, 15); + INSERT INTO editor (id, name, password, ha1) VALUES (100, 'annotation_editor', '{CLEARTEXT}password', '41bd7f7951ccec2448f74bed1b7bc6cb'); INSERT INTO annotation (id, editor, text, changelog) VALUES (1, 100, 'Annotation', 'change'); INSERT INTO work_annotation (work, annotation) VALUES (1, 1); diff --git a/test/test_indexing_real_data.py b/test/test_indexing_real_data.py index 5ba2fc5f..63323617 100644 --- a/test/test_indexing_real_data.py +++ b/test/test_indexing_real_data.py @@ -611,6 +611,15 @@ def test_index_work(self): 'work': u'Test', '_store': 'TestT-000.000.002-0', 'iswc': u'T-000.000.002-0' + }, + { + '_store': 'Blue LinesbackwardBlue LinesbackwardBlue Lines', + 'work': u'Blue Lines', + 'recording_count': 2, + 'recording': u'Blue Lines', + 'mbid': '640b17f5-4aa3-3fb1-8c6c-4792458e8a56', + 'rid': set(['bef81f8f-4bcf-4308-bd66-e57018169a94', 'a2383c02-2430-4294-9177-ef799a6eca31']), + 'type': u'Song' } ] self._test_index_entity("work", expected) From d322f80c4b558ea6b3eca8cec0138b6ed2deaf20 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Tue, 8 Nov 2022 21:56:57 +0530 Subject: [PATCH 46/48] Update extrapaths fields indexed for work Add sort_name and comment for eager loading because those are accessed by convert_artist_simple. Change recording to entity0 because hybrid properties are not eagerly loaded correctly. --- sir/schema/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index cc9beb2a..4c204b9f 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -564,6 +564,8 @@ "aliases.sort_name", "aliases.locale", "aliases.primary_for_locale", "aliases.begin_date", "aliases.end_date", + "artist_links.entity0.sort_name", + "artist_links.entity0.comment", "artist_links.link.link_type.name", "artist_links.link.link_type.gid", "artist_links.link.attributes.attribute_type.name", @@ -572,7 +574,7 @@ "recording_links.link.link_type.gid", "recording_links.link.attributes.attribute_type.name", "recording_links.link.attributes.attribute_type.gid", - "recording_links.recording.video", + "recording_links.entity0.video", "tags.count", "type.gid"] ) From dc96c3019b40c4350862cfa91470bdf72a89eb30 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 14 Nov 2022 17:19:13 +0530 Subject: [PATCH 47/48] Avoid doing recording count in sql query for work We load recording links anyway so a len in python is simpler. --- sir/schema/__init__.py | 2 +- sir/schema/modelext.py | 5 ----- sir/schema/transformfuncs.py | 4 ++++ 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/sir/schema/__init__.py b/sir/schema/__init__.py index 4c204b9f..93f0406e 100644 --- a/sir/schema/__init__.py +++ b/sir/schema/__init__.py @@ -552,7 +552,7 @@ F("iswc", "iswcs.iswc"), F("lang", "languages.language.iso_code_3"), F("recording", "recording_links.entity0.name"), - F("recording_count", "recording_count", transformfunc=tfs.integer_sum, trigger=False), + F("recording_count", "recording_links.entity0.gid", transformfunc=tfs.integer_count_all, trigger=False), F("rid", "recording_links.entity0.gid"), F("tag", "tags.tag.name"), F("type", "type.name") diff --git a/sir/schema/modelext.py b/sir/schema/modelext.py index 38c6c121..1b4bc48a 100644 --- a/sir/schema/modelext.py +++ b/sir/schema/modelext.py @@ -159,11 +159,6 @@ class CustomWork(Work): tags = relationship("WorkTag", viewonly=True) languages = relationship("WorkLanguage", viewonly=True) recording_links = relationship("LinkRecordingWork", viewonly=True) - recording_count = column_property( - select([func.count(LinkRecordingWork.id)]). - where(LinkRecordingWork.work_id == Work.id). - scalar_subquery() - ) class CustomURL(URL): diff --git a/sir/schema/transformfuncs.py b/sir/schema/transformfuncs.py index 2e9ee6e9..3cd2ce4e 100644 --- a/sir/schema/transformfuncs.py +++ b/sir/schema/transformfuncs.py @@ -43,6 +43,10 @@ def fill_none(values): return values +def integer_count_all(records): + return int(len(records)) + + def integer_sum(values): return int(sum(values)) From e9e63641cd103c29a1aca456fb870d9f7d508774 Mon Sep 17 00:00:00 2001 From: yvanzo Date: Wed, 31 May 2023 09:07:12 +0100 Subject: [PATCH 48/48] Amend 2bbacca6: Declare exchanges/queues on move (#159) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When moving to another instance of RabbitMQ (mostly for master not mirror), the exchanges and queues have to be declared using SIR’s `amqp_setup` command, otherwise messages are just dropped by the new RabbitMQ instance. This step was missing in the documentation. --- docs/source/service/rabbitmq.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/service/rabbitmq.rst b/docs/source/service/rabbitmq.rst index b92fa08f..9bf8a223 100644 --- a/docs/source/service/rabbitmq.rst +++ b/docs/source/service/rabbitmq.rst @@ -65,6 +65,7 @@ Procedures * Move service: * Create vhost, user, permissions, queues in the new instance + * Declare exchanges and queues as described in :ref:`amqp` * Update broker in PostgreSQL to point to the new instance * Once the queues in the old instance are empty, switch the live indexer to the new instance