-
Notifications
You must be signed in to change notification settings - Fork 2
/
RxNorm.R
154 lines (118 loc) · 5.49 KB
/
RxNorm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# RRxNorm.R
# This script queries the RxNorm API to get the corresponding ids and names of strings representing Medications.
# Useful links:
# http://rxnav.nlm.nih.gov/RxNavViews.html#label:appendix
# http://rxnav.nlm.nih.gov/RxNormAPIs.html#
# http://mor.nlm.nih.gov/download/rxnav/RxClassIntro.html
#
# Copyright Antoine Lizee 04/2015 - [email protected]
rm(list = ls())
# Parameters of the script -----------------------------------------------
getConceptInfo <- T #Do you want to query each rxcui that has been matched to expand its properties?
defaultTimeout <- 5 #Timeout for the API calls
test <- F #Just to print out some tests of the main requesting functions
# API parameters
baseURL <- "http://rxnav.nlm.nih.gov/"
basePath <- "REST"
extension <- ".json"
# Initializing packages ------------------------------------------------
cat("## Initializing script...\n")
loadOrInstall <- function(pkg) {
#Small helper to automatically install missing packages
stopifnot(class(pkg) == "character")
if (!require(pkg, character.only = T)) {
cat("## Installing the following necessary library:", pkg, "(this should happen only once per machine)\n")
install.packages(pkg, dependencies = T, repos = "http://cran.rstudio.com/")
library(pkg, character.only = T)
}
}
loadOrInstall("httr")
loadOrInstall("jsonlite")
loadOrInstall("plyr")
# Main functions ----------------------------------------------------------
buildPath <- function(action, suffix = NA, extend = TRUE) {
#Builds the path used to query the RxNorm API
paste0(basePath, "/",
action,
ifelse(is.na(suffix), "", paste0("/", suffix)),
ifelse(extend, extension, ""))
}
fuzzyMatches <- function(term, n = 3, keep_rxaui = F, set_timeout = defaultTimeout) {
#fuzzyMatches() queries the RxNorm API to match a medication string to several
#rxcuis. It returns all the matches that have a score of 100 if any,
#otherwise, it returns the most of (i) the top n (ii) the tied best matches.
#If 'keep_rxaui' is set to FALSE (default), the atomic information is discarded,
#keeping the metrics of the best match, and the results significantly more compact.
r <- GET(url = baseURL,
path = buildPath("approximateTerm"),
query = list(term = term),
timeout(set_timeout))
df <- fromJSON(content(r, "text"))$approximateGroup$candidate
if (is.null(df)) return(NULL)
df <- data.frame(lapply(df, as.numeric))
if (!keep_rxaui) {
df <- ddply(df, ~rxcui, summarize,
score = max(score),
rank = min(rank),
nAtoms = length(rxaui) )
}
df <- df[order(df$rank), ]
if (any(is100 <- df$score == 100)) {
return(df[is100, ])
} else {
return(df[1:min(max(sum(df$rank == 1), n), nrow(df)),])
}
}
if (test) {
print(fuzzyMatches("Hydrocodone-Acetaminophen")) # General String, several perfect matches => all the 100 are returned
print(fuzzyMatches("Hydrocodone-Acetaminophen oiuy")) # General String + mistake => all the first matches are returned
print(fuzzyMatches(" Avandamet 2-500 MG Oral Tablet")) # Specific String => The only 100 is returned
print(fuzzyMatches(" Avandamet 2-500 MG Oral Tablet poiu", n = 3)) # Specific String with mistake => the top n are returned
}
rxcuiInfo <- function(rxcui, set_timeout = defaultTimeout) {
#rxcuiInfo() queries the API to get further information about a particular concept.
r <- GET(url = baseURL,
path = buildPath("rxcui", paste0(rxcui, "/properties")),
timeout(set_timeout))
if (is.null(content(r))) return(NULL)
info <- data.frame(fromJSON(content(r, "text"))$properties, stringsAsFactors = F)
info[c("rxcui")] <- as.numeric(info[c("rxcui")])
return(info)
}
if (test) {
rxcuiInfo(214182)
}
# Read Example File ----------------------------------------------------------------
medTable <- read.delim("Input/medications.tsv", stringsAsFactors = F)
medStrings <- unique(medTable$medication)
# Match the names to rxcuis in batch ---------------------------------------------
cat("## Matching medication names...\n")
pb <- txtProgressBar(i <- 0, length(medStrings), style = 3, initial = NA)
allMatches <- do.call(rbind,
lapply(medStrings, function(med) {
matches <- fuzzyMatches(med)
Sys.sleep(0.1)
setTxtProgressBar(pb, i <<- i+1)
if (!is.null(matches)) data.frame(medString = med, matches) else NULL
}))
close(pb)
# Match the rxcui to their properties -------------------------------------
if (getConceptInfo) {
cat("## Getting concept information...\n")
rxcuis <- unique(allMatches$rxcui)
pb <- txtProgressBar(i <- 0, length(rxcuis), style = 3, initial = NA)
rxcuiProperties <- do.call(rbind,
lapply(rxcuis, function(rxcui) {
properties <- rxcuiInfo(rxcui)
Sys.sleep(0.1)
setTxtProgressBar(pb, i <<- i+1)
properties
}))
close(pb)
allMatches <- merge(allMatches, rxcuiProperties)
}
# Merge the data with the original med identifier to match ----------------
results <- merge(medTable, allMatches, by.x = "medication", by.y = "medString")
# Write output table ------------------------------------------------------
write.csv(results, "Output/allMatchesWithProperties.csv", row.names = F)
save(results, medTable, file = "Input/allMatchesWithProperties.RData")