From 091dd49bf14630b93ee1623e480fa6232e884a39 Mon Sep 17 00:00:00 2001 From: Carlos Rueda Date: Thu, 7 Sep 2017 20:38:57 -0700 Subject: [PATCH] 0.5.0 add watchdog option, suitable to be used in a cronjob --- README.md | 94 ++------------- build.sbt | 2 +- .../cf-standard-name-table.conv-stats.txt | 2 +- cf2rdf_output/cf-standard-name-table.rdf | 2 +- changelog.md | 6 + src/main/resources/params_template.conf | 14 +-- src/main/resources/reference.conf | 2 +- .../scala/org/mmisw/cf2rdf/Converter.scala | 16 +-- .../org/mmisw/cf2rdf/ModelConstructor.scala | 15 ++- .../scala/org/mmisw/cf2rdf/OrrNvsMapper.scala | 13 ++- .../scala/org/mmisw/cf2rdf/Registerer.scala | 10 +- src/main/scala/org/mmisw/cf2rdf/cf2rdf.scala | 108 ++++++------------ .../org/mmisw/cf2rdf/config/Cf2RdfCfg.scala | 1 - .../org/mmisw/cf2rdf/config/package.scala | 6 +- src/main/scala/org/mmisw/cf2rdf/package.scala | 86 +++++++++++++- .../scala/org/mmisw/cf2rdf/watchdog.scala | 54 +++++++++ 16 files changed, 227 insertions(+), 204 deletions(-) create mode 100644 src/main/scala/org/mmisw/cf2rdf/watchdog.scala diff --git a/README.md b/README.md index 8f64f92..fd3e191 100644 --- a/README.md +++ b/README.md @@ -23,95 +23,19 @@ $ java -jar cf2rdf-x.y.z.jar generate-conf Edit `cf2rdf.conf` as needed. -As command-line arguments for the regular execution, cf2rdf expects the desired steps to be performed. - -The complete sequence, including registration looks like so: +As command-line arguments for the regular execution, cf2rdf expects either the +`watchdog` argument to check for current remote CF version and trigger conversion +and registration in case of new version (based on comparison with latest processed file): ```shell -$ java -jar cf2rdf-x.y.z.jar download convert register - -Downloading https://raw.githubusercontent.com/cf-convention/cf-convention.github.io/master/Data/cf-standard-names/46/src/cf-standard-name-table.xml - -> ./cf2rdf_output/cf-standard-name-table.xml - -Downloading http://vocab.nerc.ac.uk/collection/P07/current/ - -> ./cf2rdf_output/nvs_P07.rdf - - -Replaced %20 for space in the following rdf:resource IRIs from ./cf2rdf_output/nvs_P07.rdf - - - - - - -[main] WARN org.apache.jena.riot - [line: 1, col: 121] {W119} A processing instruction is in RDF content. No processing was done. - -Summary: (saved in ./cf2rdf_output/cf-standard-name-table.conv-stats.txt) - cf2rdf conversion - input: https://raw.githubusercontent.com/cf-convention/cf-convention.github.io/master/Data/cf-standard-names/46/src/cf-standard-name-table.xml - output: ./cf2rdf_output/cf-standard-name-table.rdf - - vocabulary properties from input file: - version_number: 46; last_modified: 2017-07-25T09:41:29Z - - conversion stats: - numConcepts = 2890 - numEntries = 2889 - numWithNoCanonicalUnits = 9 - numWithNoDefinitions = 26 - - Mapping ontology: - mappingTermsAdded = 2889 - mappingOutputFilename = ./cf2rdf_output/cfonmap.n3 - +$ java -jar cf2rdf-x.y.z.jar watchdog +``` -Registering http://mmisw.org/ont/cf/parameter - Climate and Forecast (CF) Standard Names (v.46) - - uploading... - POST http://localhost:8081/api/v0/ont/upload - - registering... - PUT http://localhost:8081/api/v0/ont { - "orgName":"mmi", - "name":"Climate and Forecast (CF) Standard Names (v.46)", - "uploadedFilename":"1504822022386._guess", - "uploadedFormat":"rdf", - "iri":"http://mmisw.org/ont/cf/parameter", - "status":"stable", - "visibility":"public", - "log":"reflect version number 46", - "userName":"carueda" - } - Result: - { - "uri":"http://mmisw.org/ont/cf/parameter", - "version":"20170907T150702", - "visibility":"public", - "status":"stable", - "updated":"2017-09-07T15:07:02Z" - } +or the desired specific steps to be performed, for example: + +```shell +$ java -jar cf2rdf-x.y.z.jar download convert register -Registering http://mmisw.org/ont/mmi/cfonmap - ORR-NVS CF standard name mapping (v.46) - - uploading... - POST http://localhost:8081/api/v0/ont/upload - - registering... - PUT http://localhost:8081/api/v0/ont { - "orgName":"mmi", - "name":"ORR-NVS CF standard name mapping (v.46)", - "uploadedFilename":"1504822022908._guess", - "uploadedFormat":"n3", - "iri":"http://mmisw.org/ont/mmi/cfonmap", - "status":"stable", - "visibility":"public", - "log":"reflect version number 46", - "userName":"carueda" - } - Result: - { - "uri":"http://mmisw.org/ont/mmi/cfonmap", - "version":"20170907T150703", - "visibility":"public", - "status":"stable", - "updated":"2017-09-07T15:07:03Z" - } ``` The latest conversion report is [here](cf2rdf_output/cf-standard-name-table.conv-stats.txt). diff --git a/build.sbt b/build.sbt index 0e51af6..4360136 100644 --- a/build.sbt +++ b/build.sbt @@ -1,4 +1,4 @@ -lazy val cf2rdfVersion = setVersion("0.4.0") +lazy val cf2rdfVersion = setVersion("0.5.0") val scalaV = "2.12.2" val cfgV = "0.0.7" val scalajHttpV = "2.3.0" diff --git a/cf2rdf_output/cf-standard-name-table.conv-stats.txt b/cf2rdf_output/cf-standard-name-table.conv-stats.txt index 2313290..03f7788 100644 --- a/cf2rdf_output/cf-standard-name-table.conv-stats.txt +++ b/cf2rdf_output/cf-standard-name-table.conv-stats.txt @@ -1,5 +1,5 @@ cf2rdf conversion -input: https://raw.githubusercontent.com/cf-convention/cf-convention.github.io/master/Data/cf-standard-names/46/src/cf-standard-name-table.xml +input: https://raw.githubusercontent.com/cf-convention/cf-convention.github.io/master/Data/cf-standard-names/current/src/cf-standard-name-table.xml output: ./cf2rdf_output/cf-standard-name-table.rdf vocabulary properties from input file: diff --git a/cf2rdf_output/cf-standard-name-table.rdf b/cf2rdf_output/cf-standard-name-table.rdf index e7cf610..9287b81 100644 --- a/cf2rdf_output/cf-standard-name-table.rdf +++ b/cf2rdf_output/cf-standard-name-table.rdf @@ -20,10 +20,10 @@ http://mmisw.org/ont/mmi/resourcetype/parameter Ontology representation of the Climate and Forecast (CF) standard names parameter vocabulary, which is intended for use with climate and forecast data in the atmosphere, surface and ocean domains. Every CF parameter is captured as a SKOS concept. Climate and Forecast (CF) Standard Names (v.46) + https://raw.githubusercontent.com/cf-convention/cf-convention.github.io/master/Data/cf-standard-names/current/src/cf-standard-name-table.xml http://marinemetadata.org/orrcf CF-standard-name NetCDF, CF, Climate and Forecast, self-describing, standard names, Canonical Units - https://raw.githubusercontent.com/cf-convention/cf-convention.github.io/master/Data/cf-standard-names/46/src/cf-standard-name-table.xml http://cfconventions.org/standard-names.html diff --git a/changelog.md b/changelog.md index b914d18..e150573 100644 --- a/changelog.md +++ b/changelog.md @@ -1,3 +1,9 @@ +2017-09-07 0.5.0 + +- add watchdog option, suitable to be used in a cronjob. + This checks for current remote CF version and triggers conversion/registration + in case of new version, based on comparison with latest processed file. + 2017-09-07 0.4.0 - cf2rdf can now register the generated ontologies diff --git a/src/main/resources/params_template.conf b/src/main/resources/params_template.conf index 9e2e9bc..a6d26ac 100644 --- a/src/main/resources/params_template.conf +++ b/src/main/resources/params_template.conf @@ -2,20 +2,16 @@ # cf2rdf execution parameters. # # You can adjust any of the entries in this file but only the following -# will typically need to be adjusted as needed: -# cfVersion +# will typically need to be adjusted: # orr.userName # orr.password # -# The CF version number to process -cfVersion = 46 - -# The template for the URL to retrieve the XML file -xmlUrl = "https://raw.githubusercontent.com/cf-convention/cf-convention.github.io/master/Data/cf-standard-names/"${cfVersion}"/src/cf-standard-name-table.xml" +# URL of current XML file +xmlUrl = "https://raw.githubusercontent.com/cf-convention/cf-convention.github.io/master/Data/cf-standard-names/current/src/cf-standard-name-table.xml" # Output directory for generated files -outDir = "cf2rdf_output" +outDir = "./cf2rdf_output" # Name of local file for the download of xmlUrl destXml = ${outDir}"/cf-standard-name-table.xml" @@ -53,7 +49,7 @@ orr { userName = ? password = ? - # Organization that owns the entries at the ORR instance + # Organization that owns the rdf.iri and mapping.iri entries at the ORR instance orgName = "mmi" # Visibility for the registration diff --git a/src/main/resources/reference.conf b/src/main/resources/reference.conf index 12043b0..c747a71 100644 --- a/src/main/resources/reference.conf +++ b/src/main/resources/reference.conf @@ -1 +1 @@ -cf2rdf.version = 0.4.0 \ No newline at end of file +cf2rdf.version = 0.5.0 \ No newline at end of file diff --git a/src/main/scala/org/mmisw/cf2rdf/Converter.scala b/src/main/scala/org/mmisw/cf2rdf/Converter.scala index b1d4179..3769d43 100644 --- a/src/main/scala/org/mmisw/cf2rdf/Converter.scala +++ b/src/main/scala/org/mmisw/cf2rdf/Converter.scala @@ -11,13 +11,7 @@ import scala.xml.Node * * @param xmlIn Input XML */ -class Converter(xmlIn: Node) { - - /** some general properties from the input */ - val props: Map[String,String] = { - val keys = List("version_number", "last_modified") //, "institution", "contact") - (keys map (k ⇒ k -> (xmlIn \ k).text.trim)).toMap - } +class Converter(xmlIn: Node, xmlProps: Map[String,String]) { /** * Does the conversion @@ -25,13 +19,15 @@ class Converter(xmlIn: Node) { * @return Resulting Jena model */ def convert: Model = { - val lastModifiedOpt = props.get("last_modified") + val lastModifiedOpt = xmlProps.get("last_modified") val namespace = cfg.rdf.iri + "/" - val M = new ModelConstructor(namespace, lastModifiedOpt) + val cfVersionOpt = xmlProps.get("version_number") + + val M = new ModelConstructor(namespace, cfVersionOpt, lastModifiedOpt) - val mapper = new OrrNvsMapper(lastModifiedOpt) + val mapper = new OrrNvsMapper(cfVersionOpt, lastModifiedOpt) for (entry ← xmlIn \\ "entry") { stats.numEntries += 1 diff --git a/src/main/scala/org/mmisw/cf2rdf/ModelConstructor.scala b/src/main/scala/org/mmisw/cf2rdf/ModelConstructor.scala index 342d54d..681a799 100644 --- a/src/main/scala/org/mmisw/cf2rdf/ModelConstructor.scala +++ b/src/main/scala/org/mmisw/cf2rdf/ModelConstructor.scala @@ -7,6 +7,7 @@ import org.apache.jena.vocabulary._ import org.mmisw.orr.ont.vocabulary.{Omv, OmvMmi} class ModelConstructor(namespace: String, + cfVersionOpt: Option[String], lastModifiedOpt: Option[String] ) { @@ -32,8 +33,17 @@ class ModelConstructor(namespace: String, private val ontology = model.createOntology(cfg.rdf.iri) - ontology.addProperty(Omv.name, - s"Climate and Forecast (CF) Standard Names (v.${cfg.cfVersion})") + cfVersionOpt match { + case Some(cfVersion) ⇒ + ontology.addProperty(Omv.name, + s"Climate and Forecast (CF) Standard Names (v.$cfVersion)") + + ontology.addProperty(OmvMmi.origVocVersionId, cfVersion) + + case None ⇒ + ontology.addProperty(Omv.name, + s"Climate and Forecast (CF) Standard Names") + } ontology.addProperty(Omv.description, "Ontology representation of the Climate and Forecast (CF) standard names parameter vocabulary," + @@ -61,7 +71,6 @@ class ModelConstructor(namespace: String, ontology.addProperty(Omv.creationDate, lm) } - ontology.addProperty(OmvMmi.origVocVersionId, cfg.cfVersion) ontology.addProperty(OmvMmi.origVocUri, cfg.xmlUrl) ontology.addProperty(OmvMmi.hasResourceType, diff --git a/src/main/scala/org/mmisw/cf2rdf/OrrNvsMapper.scala b/src/main/scala/org/mmisw/cf2rdf/OrrNvsMapper.scala index 23dd2c5..522dafd 100644 --- a/src/main/scala/org/mmisw/cf2rdf/OrrNvsMapper.scala +++ b/src/main/scala/org/mmisw/cf2rdf/OrrNvsMapper.scala @@ -12,7 +12,7 @@ import org.mmisw.orr.ont.vocabulary.{Omv, OmvMmi} import scala.collection.JavaConverters._ -class OrrNvsMapper(lastModifiedOpt: Option[String]) { +class OrrNvsMapper(cfVersionOpt: Option[String], lastModifiedOpt: Option[String]) { private val mapNamespace = cfg.mapping.iri + "/" private val orrCfNamespace = cfg.rdf.iri + "/" @@ -30,8 +30,15 @@ class OrrNvsMapper(lastModifiedOpt: Option[String]) { private val ontology = model.createOntology(cfg.mapping.iri) - ontology.addProperty(Omv.name, - s"ORR-NVS CF standard name mapping (v.${cfg.cfVersion})") + cfVersionOpt match { + case Some(cfVersion) ⇒ + ontology.addProperty(Omv.name, + s"ORR-NVS CF standard name mapping (v.$cfVersion)") + + case None ⇒ + ontology.addProperty(Omv.name, + s"ORR-NVS CF standard name mapping") + } ontology.addProperty(Omv.description, "Uses skos:exactMatch to link the IRIs of the CF standard names between the" + diff --git a/src/main/scala/org/mmisw/cf2rdf/Registerer.scala b/src/main/scala/org/mmisw/cf2rdf/Registerer.scala index 6ecf9cd..64e4c11 100644 --- a/src/main/scala/org/mmisw/cf2rdf/Registerer.scala +++ b/src/main/scala/org/mmisw/cf2rdf/Registerer.scala @@ -12,7 +12,7 @@ import org.json4s.native.Serialization.writePretty import scalaj.http.{Http, HttpResponse, MultiPart} -class Registerer(orr: OrrCfg) { +class Registerer(orr: OrrCfg, cfVersion: String) { private implicit val jsonFormats: Formats = DefaultFormats ++ JodaTimeSerializers.all def registerOntologies(): Unit = { @@ -22,16 +22,16 @@ class Registerer(orr: OrrCfg) { def registerRdf(): Unit = { register(cfg.rdf.iri, - s"Climate and Forecast (CF) Standard Names (v.${cfg.cfVersion})", - log = s"reflect version number ${cfg.cfVersion}", + s"Climate and Forecast (CF) Standard Names (v.$cfVersion)", + log = s"reflect version number $cfVersion", cfg.rdf.filename ) } def registerMapping(): Unit = { register(cfg.mapping.iri, - s"ORR-NVS CF standard name mapping (v.${cfg.cfVersion})", - log = s"reflect version number ${cfg.cfVersion}", + s"ORR-NVS CF standard name mapping (v.$cfVersion)", + log = s"reflect version number $cfVersion", cfg.mapping.filename ) } diff --git a/src/main/scala/org/mmisw/cf2rdf/cf2rdf.scala b/src/main/scala/org/mmisw/cf2rdf/cf2rdf.scala index 53389bb..88ef8c0 100644 --- a/src/main/scala/org/mmisw/cf2rdf/cf2rdf.scala +++ b/src/main/scala/org/mmisw/cf2rdf/cf2rdf.scala @@ -1,11 +1,8 @@ package org.mmisw.cf2rdf -import config.cfg -import java.io.{File, PrintWriter} +import java.io.File -import org.apache.jena.rdf.model.Model import org.apache.jena.system.JenaSystem - -import scalaj.http.{Http, HttpResponse} +import org.mmisw.cf2rdf.config.cfg /** * Main cf2rdf program. @@ -16,27 +13,31 @@ object cf2rdf { def main(args: Array[String]): Unit = { if (args.contains("generate-conf")) { generateConf(args) - sys.exit(0) } - - val givenStepNames = collection.mutable.SortedSet[String]() - args foreach { stepName ⇒ - if (!steps.contains(stepName)) { - println(s"invalid step $stepName. Valid steps: ${stepNameOrder.mkString(", ")}") - sys.exit(1) - } - givenStepNames += stepName - } - - if (givenStepNames.nonEmpty) { - stepNameOrder.filter(givenStepNames.contains) foreach { steps(_)() } + else if (args.contains("watchdog")) { + watchdog.run() } else { - println(s""" - |Usage: - | cf2rdf generate-conf [--overwrite] - | cf2rdf [download] [convert] [register] - """.stripMargin) + val givenStepNames = collection.mutable.SortedSet[String]() + args foreach { stepName ⇒ + if (!steps.contains(stepName)) { + println(s"invalid step $stepName. Valid steps: ${stepNameOrder.mkString(", ")}") + sys.exit(1) + } + givenStepNames += stepName + } + + if (givenStepNames.nonEmpty) { + stepNameOrder.filter(givenStepNames.contains) foreach { steps(_)() } + } + else { + println(s""" + |Usage: + | cf2rdf generate-conf [--overwrite] + | cf2rdf watchdog + | cf2rdf [download] [convert] [register] + """.stripMargin) + } } } @@ -50,9 +51,7 @@ object cf2rdf { val conf = scala.io.Source.fromInputStream( getClass.getClassLoader.getResource("params_template.conf").openStream() ).mkString - val pw = new PrintWriter(file) - pw.print(conf) - pw.close() + writeFile(conf, file) println(s" Configuration generated: $filename\n") } @@ -63,65 +62,24 @@ object cf2rdf { "register" → registerOntologies _ ) + var cfVersionOpt: Option[String] = None + private def downloadFiles(): Unit = { download(cfg.xmlUrl, cfg.destXml) download(cfg.nvs.rdfUrl, cfg.nvs.rdfFilename) } private def generateAndSaveRdf(): Unit = { - val xmlIn = scala.xml.XML.loadFile(cfg.destXml) - val converter = new Converter(xmlIn) - val model = converter.convert + val (xmlIn, xmlProps) = loadXmlFile(cfg.destXml) + val model = generateModel(xmlIn, xmlProps) saveModel(model) - - val statsStr = { - val propsStr = (converter.props map (kv ⇒ s"${kv._1}: ${kv._2}")) mkString "; " - s"""cf2rdf conversion - |input: ${cfg.xmlUrl} - |output: ${cfg.rdf.filename} - | - |vocabulary properties from input file: - | $propsStr - | - |conversion stats: - |$stats - |""".stripMargin - } - + val statsStr = getSummary(xmlProps) writeFile(statsStr, cfg.destStats) println(s"\nSummary: (saved in ${cfg.destStats})\n\t" + statsStr.replaceAll("\n", "\n\t")) } - private def registerOntologies(): Unit = - cfg.orr foreach { new Registerer(_).registerOntologies() } - - private def download(url: String, filename: String): Unit = { - println(s"Downloading $url") - val response: HttpResponse[String] = Http(url) - .method("GET") - .timeout(connTimeoutMs = 5*1000, readTimeoutMs = 60*1000) - .asString - - val contents = if (response.code == 200) response.body - else throw new Exception( - s"""Error downloading $url - |Code=${response.code}: ${response.statusLine} - |${response.body} - |""".stripMargin) - - val pw = new PrintWriter(createOutputFile(filename)) - pw.print(contents) - pw.close() - println(s" -> $filename\n") - } - - private def saveModel(model: Model) { - val namespace = cfg.rdf.iri + "/" - val writer = model.getWriter(cfg.rdf.format) - writer.setProperty("showXmlDeclaration", "true") - writer.setProperty("relativeURIs", "same-document,relative") - writer.setProperty("xmlbase", namespace) - val out = new java.io.FileOutputStream(createOutputFile(cfg.rdf.filename)) - writer.write(model, out, null) + private def registerOntologies(): Unit = { + val cfVersion = cfVersionOpt.getOrElse(throw new Exception) + cfg.orr foreach { new Registerer(_, cfVersion).registerOntologies() } } } diff --git a/src/main/scala/org/mmisw/cf2rdf/config/Cf2RdfCfg.scala b/src/main/scala/org/mmisw/cf2rdf/config/Cf2RdfCfg.scala index d0bd95b..1ba555e 100644 --- a/src/main/scala/org/mmisw/cf2rdf/config/Cf2RdfCfg.scala +++ b/src/main/scala/org/mmisw/cf2rdf/config/Cf2RdfCfg.scala @@ -4,7 +4,6 @@ import carueda.cfg._ @Cfg case class Cf2RdfCfg( - cfVersion: String, xmlUrl: String, destXml: String, destStats: String, diff --git a/src/main/scala/org/mmisw/cf2rdf/config/package.scala b/src/main/scala/org/mmisw/cf2rdf/config/package.scala index fdefbec..47dd6d4 100644 --- a/src/main/scala/org/mmisw/cf2rdf/config/package.scala +++ b/src/main/scala/org/mmisw/cf2rdf/config/package.scala @@ -7,9 +7,5 @@ import com.typesafe.config.ConfigFactory package object config { val configFile = new File("cf2rdf.conf") - lazy val cfg: Cf2RdfCfg = { - val c = Cf2RdfCfg(ConfigFactory.parseFile(configFile).resolve()) - println(s"cfg = $c\n") - c - } + lazy val cfg: Cf2RdfCfg = Cf2RdfCfg(ConfigFactory.parseFile(configFile).resolve()) } diff --git a/src/main/scala/org/mmisw/cf2rdf/package.scala b/src/main/scala/org/mmisw/cf2rdf/package.scala index 86bb839..78be330 100644 --- a/src/main/scala/org/mmisw/cf2rdf/package.scala +++ b/src/main/scala/org/mmisw/cf2rdf/package.scala @@ -1,16 +1,89 @@ package org.mmisw -import java.io.{File, PrintWriter} +import java.io.File + +import org.apache.jena.rdf.model.Model +import org.mmisw.cf2rdf.config.cfg + +import scala.xml.{Elem, Node} +import scalaj.http.{Http, HttpResponse} package object cf2rdf { + def download(url: String): String = { + println(s"Downloading $url") + val response: HttpResponse[String] = Http(url) + .method("GET") + .timeout(connTimeoutMs = 5*1000, readTimeoutMs = 60*1000) + .asString + + if (response.code == 200) response.body + else throw new Exception( + s"""Error downloading $url + |Code=${response.code}: ${response.statusLine} + |${response.body} + |""".stripMargin) + } + + def download(url: String, filename: String): String = { + val contents = download(url) + writeFile(contents, filename) + println(s" -> $filename\n") + contents + } + + def loadXmlString(xml: String): (Elem, Map[String, String]) = { + val xmlIn: Elem = scala.xml.XML.loadString(xml) + (xmlIn, getXmlProps(xmlIn)) + } + + def loadXmlFile(filename: String): (Elem, Map[String, String]) = { + val xmlIn: Elem = scala.xml.XML.loadFile(filename) + (xmlIn, getXmlProps(xmlIn)) + } + + private def getXmlProps(xmlIn: Elem): Map[String, String] = { + /** some general properties from the input */ + val keys = List("version_number", "last_modified") //, "institution", "contact") + (keys map (k ⇒ k -> (xmlIn \ k).text.trim)).toMap + } + + def getSummary(xmlProps: Map[String, String]): String = { + val propsStr = (xmlProps map (kv ⇒ s"${kv._1}: ${kv._2}")) mkString "; " + s"""cf2rdf conversion + |input: ${cfg.xmlUrl} + |output: ${cfg.rdf.filename} + | + |vocabulary properties from input file: + | $propsStr + | + |conversion stats: + |$stats + |""".stripMargin + } + + def generateModel(xmlIn: Node, xmlProps: Map[String,String]): Model = { + new Converter(xmlIn, xmlProps).convert + } + + def saveModel(model: Model) { + val namespace = cfg.rdf.iri + "/" + val writer = model.getWriter(cfg.rdf.format) + writer.setProperty("showXmlDeclaration", "true") + writer.setProperty("relativeURIs", "same-document,relative") + writer.setProperty("xmlbase", namespace) + val out = new java.io.FileOutputStream(createOutputFile(cfg.rdf.filename)) + writer.write(model, out, null) + } + def writeFile(contents: String, filename: String): Unit = writeFile(contents, createOutputFile(filename) ) def writeFile(contents: String, file: File): Unit = { - val pw = new PrintWriter(file) - pw.printf(contents) - pw.close() + import java.nio.charset.StandardCharsets + import java.nio.file.Files + val bytes = contents.getBytes(StandardCharsets.UTF_8) + Files.write(file.toPath, bytes) } def createOutputFile(filename: String): File = { @@ -19,4 +92,9 @@ package object cf2rdf { if(parent != null) parent.mkdirs() file } + + def loadFile(filename: String): String = { + scala.io.Source.fromFile(filename).mkString + } + } diff --git a/src/main/scala/org/mmisw/cf2rdf/watchdog.scala b/src/main/scala/org/mmisw/cf2rdf/watchdog.scala new file mode 100644 index 0000000..206d954 --- /dev/null +++ b/src/main/scala/org/mmisw/cf2rdf/watchdog.scala @@ -0,0 +1,54 @@ +package org.mmisw.cf2rdf + +import org.mmisw.cf2rdf.config.{OrrCfg, cfg} + +import scala.xml.Elem + +object watchdog { + + def run(): Unit = { + println(s"--cf2rdf watchdog starting --") + + val orr: OrrCfg = cfg.orr.getOrElse( + throw new Exception("watchdog requires the 'orr' parameters")) + + val newXml = download(cfg.xmlUrl) + val lpvXml = loadFile(cfg.destXml) + + val (newXmlIn, newXmlProps) = loadXmlString(newXml) + val (_, lpvXmlProps) = loadXmlString(lpvXml) + + val newVersionOpt = newXmlProps.get("version_number") + val lpvVersionOpt = lpvXmlProps.get("version_number") + + println(s"Downloaded version: ${newVersionOpt.getOrElse("?")}") + println(s"Last processed version: ${lpvVersionOpt.getOrElse("?")}") + + if (newVersionOpt == lpvVersionOpt) + println(s"--cf2rdf watchdog: nothing to do. --") + else + convertAndRegister(orr, newXml, newXmlIn, newXmlProps) + } + + private def convertAndRegister(orr: OrrCfg, + newXml: String, + xmlIn: Elem, + xmlProps: Map[String, String] + ): Unit = { + + val model = generateModel(xmlIn, xmlProps) + saveModel(model) + val statsStr = getSummary(xmlProps) + writeFile(statsStr, cfg.destStats) + + val cfVersion = xmlProps.getOrElse("version_number", + throw new Exception("Unexpected missing version_number")) + + new Registerer(orr, cfVersion).registerOntologies() + + println(s"Updating ${cfg.destXml} (with ${newXml.length} chars)") + writeFile(newXml, cfg.destXml) + + println(s"--cf2rdf watchdog done --") + } +}