Skip to content

Commit

Permalink
Filter Python CD index results with queries
Browse files Browse the repository at this point in the history
This is more efficient than a single DELETE statement.
  • Loading branch information
dspinellis committed Oct 9, 2024
1 parent 19fe1d2 commit 0e6259a
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 14 deletions.
6 changes: 6 additions & 0 deletions examples/cdindex/cd5index-all-py.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- Export the CD5 index for works where one can be calculated

CREATE INDEX IF NOT EXISTS rolap.valid_cd5index_doi_idx ON valid_cd5index(doi);

SELECT doi, cdindex FROM rolap.cdindex
INNER JOIN rolap.valid_cd5index USING(doi);;
14 changes: 0 additions & 14 deletions examples/cdindex/cdindex-db.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,18 +152,4 @@ def process_batch(start):

db.commit()

db.execute("""
-- Works and references
ATTACH 'cdindex.db' AS wr;
DELETE FROM cdindex
WHERE doi NOT IN (
SELECT cdindex.doi FROM cdindex
INNER JOIN wr.works USING(doi)
WHERE works.published_year <= 2018 OR
(SELECT 1 FROM work_references WHERE work_id == works.id)
);
""")
perf.log("Remove invalid records")

db.close()
9 changes: 9 additions & 0 deletions examples/cdindex/valid_cd5index.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- Create a table of publications for which a valid CD5 index can be calculated

CREATE INDEX IF NOT EXISTS rolap.cdindex_doi_idx ON cdindex(doi);

CREATE TABLE rolap.valid_cd5index AS
SELECT cdindex.doi FROM rolap.cdindex
INNER JOIN works USING(doi)
WHERE works.published_year <= 2018 AND
(SELECT 1 FROM work_references WHERE work_id == works.id);

0 comments on commit 0e6259a

Please sign in to comment.