Skip to content

Commit

Permalink
Merge pull request #19 from UCDenver-ccp/with-MONDO-annotations
Browse files Browse the repository at this point in the history
with mondo annotations and MONDO.owl
  • Loading branch information
bill-baumgartner authored Feb 16, 2022
2 parents 02fd0d8 + 263ba5d commit 8663732
Show file tree
Hide file tree
Showing 195 changed files with 28,046 additions and 18 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.DS_Store
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# CRAFT Corpus Changes

## Version 5.0.0
* MONDO annotations have been added

## Version 4.0.1
* Coreference guidelines have been added to the distribution
* Includes updates fixing a few incorrect annotations
Expand Down
46 changes: 28 additions & 18 deletions build.boot
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
(set-env! :dependencies '[[edu.ucdenver.ccp/file-conversion-onejar "0.2.2"]]
(set-env! :dependencies '[[edu.ucdenver.ccp/file-conversion-onejar "0.3.0"]]
:repositories {"bionlp" "https://svn.code.sf.net/p/bionlp/code/repo/"})
(require '[clojure.java.io :refer [file]]
'[clojure.java.io :as io])
Expand Down Expand Up @@ -76,14 +76,22 @@

(deftask concept
"Indicates that concept annotations will be processed."
[t concept-type VAL str "indicates annotation type to be processed. Must be one of CHEBI, CL, GO_BP, GO_CC, GO_MF, MOP, NCBITaxon, PR, SO, or UBERON. To indicate all concept types should be processed, use the all-concepts task instead. Note case-sensitivity in the concept types."
x include-extensions bool "indicates that extension classes should be included"]
[t concept-type VAL str "indicates annotation type to be processed. Must be one of CHEBI, CL, GO_BP, GO_CC, GO_MF, MONDO, MOP, NCBITaxon, PR, SO, or UBERON. To indicate all concept types should be processed, use the all-concepts task instead. Note case-sensitivity in the concept types. Also note that a selection of 'MONDO' will include the annotations from the MONDO_without_genotype_annotations project; alternatively, 'MONDO -x' will include annotations from the MONDO_with_genotype_annotations project."
x include-extensions bool "indicates that extension classes should be included. Note that for MONDO, '-x' indicates that the MONDO_with_genotype_annotations project will be included, as opposed to the MONDO_without_genotype_annotations project."]
(with-pre-wrap fileset
(let [valid-concept-types #{"CHEBI" "CL" "GO_BP" "GO_CC" "GO_MF" "MOP" "NCBITaxon" "PR" "SO" "UBERON"}
annotation-type (if include-extensions (str concept-type "+extensions") concept-type)
(let [valid-concept-types #{"CHEBI" "CL" "GO_BP" "GO_CC" "GO_MF" "MONDO" "MOP" "NCBITaxon" "PR" "SO" "UBERON"}
annotation-type (if include-extensions
(if (= concept-type "MONDO")
(str "MONDO_with_genotype_annotations") ;; MONDO_with_genotypes is the "extended" MONDO project. It does not contain extension classes like the other CRAFT concept annotations but does contain extra annotations.
(str concept-type "+extensions"))
(if (= concept-type "MONDO")
(str "MONDO_without_genotype_annotations") ;; MONDO_without_genotypes is the default MONDO project
concept-type))
annotation-type-symbol (symbol (str ":" annotation-type))
annotation-directory (file "concept-annotation" concept-type annotation-type "knowtator")
native-format InputFileFormat/KNOWTATOR
annotation-directory (if (= concept-type "MONDO")
(file "concept-annotation" concept-type annotation-type "knowtator-2")
(file "concept-annotation" concept-type annotation-type "knowtator"))
native-format (if (= concept-type "MONDO") InputFileFormat/KNOWTATOR2 InputFileFormat/KNOWTATOR)
valid-formats #{:brat :bionlp :pubannotation :uima :knowtator2}]
(if (not (contains? valid-concept-types concept-type))
(throw (IllegalArgumentException. (str "Invalid concept type requested: [" concept-type "]. Valid concept types include: "
Expand Down Expand Up @@ -122,6 +130,7 @@
(concept :concept-type "GO_BP" :include-extensions include-extensions)
(concept :concept-type "GO_CC" :include-extensions include-extensions)
(concept :concept-type "GO_MF" :include-extensions include-extensions)
(concept :concept-type "MONDO" :include-extensions include-extensions)
(concept :concept-type "MOP" :include-extensions include-extensions)
(concept :concept-type "NCBITaxon" :include-extensions include-extensions)
(concept :concept-type "PR" :include-extensions include-extensions)
Expand Down Expand Up @@ -241,23 +250,24 @@
annotation-type " to format: " requested-format
". Valid conversion formats include the following: " valid-formats)))))

(def concept-to-color-map {"CHEBI" "bgColor:#32cd32"
"CL" "bgColor:#ffa500"
"GO_BP" "bgColor:#00ffff"
"GO_CC" "bgColor:#ff0000"
"GO_MF" "bgColor:#7fff00"
"MOP" "bgColor:#deb887"
"NCBITAXON" "bgColor:#0f0f0f"
"PR" "bgColor:#340034"
"SO" "bgColor:#981198"
"UBERON" "bgColor:#5f9ea0"})
(def concept-to-color-map {"CHEBI" "bgColor:#32cd32" ;; lime green
"CL" "bgColor:#ffa500" ;; orange
"GO_BP" "bgColor:#00ffff" ;; cyan
"GO_CC" "bgColor:#ff0000" ;; red
"GO_MF" "bgColor:#0b5394" ;; navy blue
"MONDO" "bgColor:#ffd966" ;; yellow
"MOP" "bgColor:#deb887" ;; tan
"NCBITAXON" "bgColor:#0f0f0f" ;; very dark gray
"PR" "bgColor:#340034" ;; dark purple
"SO" "bgColor:#981198" ;; magenta
"UBERON" "bgColor:#5f9ea0"}) ;; grayish blue


(defn get-ontology-files [input]
"get ontology files for a particular annotation-type"
(let [[annotation-type annotation-directory] input]
;; gather the ontology file(s) relevant to the specified annotation-type
(filter #(and (.isFile %) (.endsWith (.getName %) ".obo.zip"))
(filter #(and (.isFile %) (or (.endsWith (.getName %) ".owl.zip") (.endsWith (.getName %) ".obo.zip")))
(file-seq (.getParentFile annotation-directory)))))


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<knowtator-project>
<document id="11319941" text-file="11319941.txt">
<annotation annotator="Default" id="11319941-98" motivation="" type="identity">
<class id="http://purl.obolibrary.org/obo/MONDO_0007739" label="'Huntington disease'"/>
<span end="2470" id="11319941-99" start="2452">Huntington disease</span>
</annotation>
<annotation annotator="Default" id="11319941-100" motivation="" type="identity">
<class id="http://purl.obolibrary.org/obo/MONDO_0005395" label="'movement disorder'"/>
<span end="2574" id="11319941-101" start="2557">movement disorder</span>
</annotation>
<annotation annotator="Default" id="11319941-104" motivation="" type="identity">
<class id="http://purl.obolibrary.org/obo/MONDO_0007739" label="'Huntington disease'"/>
<span end="21875" id="11319941-105" start="21857">Huntington disease</span>
</annotation>
<annotation annotator="Default" id="11319941-106" motivation="" type="identity">
<class id="http://purl.obolibrary.org/obo/MONDO_0009022" label="'corpus callosum, agenesis of'"/>
<span end="22265" id="11319941-107" start="22254">agenesis of</span>
<span end="22285" id="11319941-108" start="22270">corpus callosum</span>
</annotation>
</document>
</knowtator-project>
Loading

0 comments on commit 8663732

Please sign in to comment.