Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automate the cancergov.cancergov_drug and unsupervised_maps.cancergov_drug_to_omop tables #10

Open
meerapatelmd opened this issue Oct 14, 2020 · 0 comments

Comments

@meerapatelmd
Copy link
Owner

cancergov_drugs <-
fantasia::qOMOP(
                "
                SELECT DISTINCT dl.drug_link, dln.ncit_code, dl.drug, dls.drug_synonym_type, dls.drug_synonym, ns.term as ncit_drug
                FROM cancergov.drug_link dl
                LEFT JOIN cancergov.drug_link_synonym dls
                ON dls.drug_link = dl.drug_link
                LEFT JOIN cancergov.drug_link_ncit dln
                ON dln.drug_link = dl.drug_link
                LEFT JOIN cancergov.ncit_synonym ns
                ON dln.ncit_code = ns.ncit_code
                ")

cancergov_drugs2 <-
        cancergov_drugs %>%
        tidyr::pivot_longer(cols = c(drug, drug_synonym, ncit_drug),
                            names_to = "drug_name_type",
                            values_to = "cancergov_drug",
                            values_drop_na = TRUE) %>%
        dplyr::mutate(drug_name_type = factor(drug_name_type)) %>%
        dplyr::mutate(drug_name_type = forcats::fct_recode(drug_name_type,
                                                           `Label:` = "drug",
                                                           `NCIt Term:` = "ncit_drug")) %>%
        dplyr::mutate(drug_name_type = as.character(drug_name_type)) %>%
        dplyr::mutate(drug_synonym_type = dplyr::coalesce(drug_synonym_type, drug_name_type)) %>%
        dplyr::select(-drug_name_type) %>%
        dplyr::distinct() %>%
        dplyr::mutate(drug_synonym_type = factor(drug_synonym_type,
                                                 levels = c("NCIt Term:",
                                                            "Label:",
                                                            "Synonym:",
                                                            "Code name:",
                                                            "Abbreviation:",
                                                            "Acronym:",
                                                            "US brand name:",
                                                            "Chemical structure:",
                                                            "Foreign brand name:"))) %>%
        dplyr::group_by(drug_link, ncit_code, cancergov_drug) %>%
        dplyr::arrange(desc(drug_synonym_type), .by_group = TRUE) %>%
        rubix::filter_first_row() %>%
        dplyr::ungroup() %>%
        dplyr::mutate(drug_synonym_type = as.character(drug_synonym_type)) %>%
        dplyr::mutate(cd_datetime = Sys.time()) %>%
        dplyr::select(cd_datetime,
                      dplyr::everything())

omop_conn <- fantasia::connectOMOP()
pg13::send(conn = omop_conn,
           sql_statement =
                        "
                        DROP TABLE IF EXISTS cancergov.cancergov_drug;
                        CREATE TABLE cancergov.cancergov_drug (
                                        cd_datetime timestamp without time zone,
                                        drug_link varchar(255),
                                        ncit_code varchar(20),
                                        drug_synonym_type varchar(25),
                                        cancergov_drug text
                        )

                        "
                   )
pg13::appendTable(conn = omop_conn,
                  schema = "cancergov",
                  tableName = "cancergov_drug",
                  cancergov_drugs2)

fantasia::dcOMOP(conn = omop_conn)

secretary::typewrite("##### Mapping CANCERGOV_DRUG Table to the OMOP Drug Domain --> UNSUPERVISED_MAPS.CANCERGOV_TO_OMOP_DRUG",
                     file = log_file)

omop_conn <- fantasia::connectOMOP()
pg13::send(omop_conn,
                "
                DROP TABLE IF EXISTS cancergov.temp_key;
                CREATE TABLE cancergov.temp_key (
                        drug_link_id SERIAL,
                        drug_link VARCHAR
                );

                INSERT INTO cancergov.temp_key(drug_link)
                SELECT DISTINCT drug_link FROM cancergov.cancergov_drug
                ;
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map;
                        CREATE TABLE cancergov.temp_map AS (
                        SELECT
                                key.drug_link_id,
                                cgd.*,
                                c.concept_id AS match_concept_id,
                                c.concept_name AS match_concept_name,
                                c.domain_id AS match_domain_id,
                                c.vocabulary_id AS match_vocabulary_id,
                                c.concept_class_id AS match_concept_class_id,
                                c.standard_concept AS match_standard_concept,
                                c.concept_code AS match_concept_code,
                                c.valid_start_date AS match_valid_start_date,
                                c.valid_end_date AS match_valid_end_date,
                                c.invalid_reason AS match_invalid_reason
                        FROM cancergov.temp_key key
                        LEFT JOIN cancergov.cancergov_drug cgd
                        ON key.drug_link = cgd.drug_link
                        LEFT JOIN omop_vocabulary.concept_synonym cs
                        ON LOWER(cs.concept_synonym_name) = LOWER(cgd.cancergov_drug)
                        LEFT JOIN omop_vocabulary.concept c
                        ON cs.concept_id = c.concept_id
                        WHERE c.concept_id IS NOT NULL
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map2;
                        CREATE TABLE cancergov.temp_map2 AS (
                        SELECT
                                t.*,
                                c.concept_id AS ingredient_concept_id,
                                c.concept_name AS ingredient_concept_name,
                                c.domain_id AS ingredient_domain_id,
                                c.vocabulary_id AS ingredient_vocabulary_id,
                                c.concept_class_id AS ingredient_concept_class_id,
                                c.standard_concept AS ingredient_standard_concept,
                                c.concept_code AS ingredient_concept_code,
                                c.valid_start_date AS ingredient_valid_start_date,
                                c.valid_end_date AS ingredient_valid_end_date,
                                c.invalid_reason AS ingredient_invalid_reason
                        FROM cancergov.temp_map t
                        LEFT JOIN omop_vocabulary.concept_relationship cr
                        ON cr.concept_id_1 = t.match_concept_id
                        LEFT JOIN omop_vocabulary.concept c
                        ON cr.concept_id_2 = c.concept_id
                        WHERE cr.invalid_reason IS NULL
                                AND c.invalid_reason IS NULL
                                AND c.vocabulary_id IN ('RxNorm', 'RxNorm Extension')
                                AND c.concept_class_id IN ('Ingredient')
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map2HO;
                        CREATE TABLE cancergov.temp_map2HO AS (
                        SELECT
                                t.*,
                                c.concept_id AS component_concept_id,
                                c.concept_name AS component_concept_name,
                                c.domain_id AS component_domain_id,
                                c.vocabulary_id AS component_vocabulary_id,
                                c.concept_class_id AS component_concept_class_id,
                                c.standard_concept AS component_standard_concept,
                                c.concept_code AS component_concept_code,
                                c.valid_start_date AS component_valid_start_date,
                                c.valid_end_date AS component_valid_end_date,
                                c.invalid_reason AS component_invalid_reason
                        FROM cancergov.temp_map t
                        LEFT JOIN omop_vocabulary.concept_relationship cr
                        ON cr.concept_id_1 = t.match_concept_id
                        LEFT JOIN omop_vocabulary.concept c
                        ON cr.concept_id_2 = c.concept_id
                        WHERE cr.invalid_reason IS NULL
                                AND c.invalid_reason IS NULL
                                AND c.vocabulary_id IN ('HemOnc')
                                AND c.concept_class_id IN ('Component')
                                AND c.domain_id IN ('Drug')
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map3;
                        CREATE TABLE cancergov.temp_map3 AS (
                        SELECT
                                t.*,
                                c.concept_id AS ingredient_concept_id,
                                c.concept_name AS ingredient_concept_name,
                                c.domain_id AS ingredient_domain_id,
                                c.vocabulary_id AS ingredient_vocabulary_id,
                                c.concept_class_id AS ingredient_concept_class_id,
                                c.standard_concept AS ingredient_standard_concept,
                                c.concept_code AS ingredient_concept_code,
                                c.valid_start_date AS ingredient_valid_start_date,
                                c.valid_end_date AS ingredient_valid_end_date,
                                c.invalid_reason AS ingredient_invalid_reason
                        FROM cancergov.temp_map t
                        LEFT JOIN omop_vocabulary.concept_ancestor ca
                        ON ca.ancestor_concept_id = t.match_concept_id
                        LEFT JOIN omop_vocabulary.concept c
                        ON ca.descendant_concept_id = c.concept_id
                        WHERE c.invalid_reason IS NULL
                                AND c.vocabulary_id IN ('RxNorm', 'RxNorm Extension')
                                AND c.concept_class_id IN ('Ingredient')
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map3ho;
                        CREATE TABLE cancergov.temp_map3ho AS (
                        SELECT
                                t.*,
                                c.concept_id AS component_concept_id,
                                c.concept_name AS component_concept_name,
                                c.domain_id AS component_domain_id,
                                c.vocabulary_id AS component_vocabulary_id,
                                c.concept_class_id AS component_concept_class_id,
                                c.standard_concept AS component_standard_concept,
                                c.concept_code AS component_concept_code,
                                c.valid_start_date AS component_valid_start_date,
                                c.valid_end_date AS component_valid_end_date,
                                c.invalid_reason AS component_invalid_reason
                        FROM cancergov.temp_map t
                        LEFT JOIN omop_vocabulary.concept_ancestor ca
                        ON ca.ancestor_concept_id = t.match_concept_id
                        LEFT JOIN omop_vocabulary.concept c
                        ON ca.descendant_concept_id = c.concept_id
                        WHERE c.invalid_reason IS NULL
                                AND c.vocabulary_id IN ('HemOnc')
                                AND c.concept_class_id IN ('Component')
                                AND c.domain_id IN ('Drug')
                )
                "
)




pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map4;
                CREATE TABLE cancergov.temp_map4 AS (
                        SELECT *
                        FROM cancergov.temp_map2
                        UNION
                        SELECT *
                        FROM cancergov.temp_map3
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map4ho;
                CREATE TABLE cancergov.temp_map4ho AS (
                        SELECT *
                        FROM cancergov.temp_map2ho
                        UNION
                        SELECT *
                        FROM cancergov.temp_map3ho
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map5;
                CREATE TABLE cancergov.temp_map5 AS (
                        SELECT drug_link_id,
                                STRING_AGG(DISTINCT cancergov_drug, '|') AS cancergov_name_cluster,
                                STRING_AGG(DISTINCT CONCAT(ingredient_concept_id::varchar, ' ', ingredient_concept_name), E'\n') AS rxnorm_ingredient
                        FROM cancergov.temp_map4
                        GROUP BY drug_link_id
                )
                "
)

pg13::send(omop_conn,
           "
                DROP TABLE IF EXISTS cancergov.temp_map5ho;
                CREATE TABLE cancergov.temp_map5ho AS (
                        SELECT drug_link_id,
                                STRING_AGG(DISTINCT cancergov_drug, '|') AS cancergov_name_cluster,
                                STRING_AGG(DISTINCT CONCAT(component_concept_id::varchar, ' ', component_concept_name), E'\n') AS hemonc_component
                        FROM cancergov.temp_map4ho
                        GROUP BY drug_link_id
                )
                "
)

pg13::send(conn = omop_conn,
           "
           DROP TABLE IF EXISTS unsupervised_maps.cancergov_to_omop_drugs;
           CREATE TABLE unsupervised_maps.cancergov_to_omop_drugs AS (
                        WITH rx_to_ho AS (
                              SELECT DISTINCT
                                rx.drug_link_id AS rx_drug_link_id,
                                ho.drug_link_id AS ho_drug_link_id,
                                ho.hemonc_component,
                                rx.rxnorm_ingredient
                              FROM cancergov.temp_map5 rx
                              FULL JOIN cancergov.temp_map5ho ho
                              ON rx.drug_link_id = ho.drug_link_id
                        ),
                        rx_to_ho2 AS (
                                SELECT
                                        COALESCE(rx_drug_link_id, ho_drug_link_id) AS drug_link_id,
                                        hemonc_component,
                                        rxnorm_ingredient
                                FROM rx_to_ho
                        ),
                        combined_results AS (
                        SELECT DISTINCT
                                ROW_NUMBER() OVER (PARTITION BY cgd.drug_link ORDER BY character_length(dl.drug) DESC),
                                dl.drug, cgd.drug_link, cgd.ncit_code, rxho.rxnorm_ingredient, rxho.hemonc_component, STRING_AGG(cancergov_drug, '|') AS cancergov_name_cluster
                        FROM cancergov.temp_key key
                        LEFT JOIN rx_to_ho2 rxho
                        ON rxho.drug_link_id = key.drug_link_id
                        LEFT JOIN cancergov.cancergov_drug cgd
                        ON cgd.drug_link = key.drug_link
                        LEFT JOIN cancergov.drug_link dl
                        ON dl.drug_link = cgd.drug_link
                        GROUP BY dl.drug, cgd.drug_link, cgd.ncit_code, rxho.rxnorm_ingredient, rxho.hemonc_component
                        )

                        SELECT DISTINCT
                                LOCALTIMESTAMP(0) AS cgod_datetime,
                                cr.drug,
                                cr.drug_link,
                                cr.ncit_code,
                                cr.rxnorm_ingredient,
                                cr.hemonc_component,
                                cancergov_name_cluster
                        FROM combined_results cr
                        WHERE row_number = 1

           )
           ")


pg13::send(omop_conn,
           "
           DROP TABLE IF EXISTS cancergov.temp_key;
           DROP TABLE IF EXISTS cancergov.temp_mapho;
           DROP TABLE IF EXISTS cancergov.temp_map2ho;
           DROP TABLE IF EXISTS cancergov.temp_map3ho;
           DROP TABLE IF EXISTS cancergov.temp_map4ho;
           DROP TABLE IF EXISTS cancergov.temp_map5ho;
           DROP TABLE IF EXISTS cancergov.temp_map;
           DROP TABLE IF EXISTS cancergov.temp_map2;
           DROP TABLE IF EXISTS cancergov.temp_map3;
           DROP TABLE IF EXISTS cancergov.temp_map4;
           DROP TABLE IF EXISTS cancergov.temp_map5;
           ")
fantasia::dcOMOP(conn = omop_conn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant