diff --git a/README.md b/README.md index 35a4a16..07e6a44 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,21 @@ MARVIN is the release bot that does automated DBpedia releases each month on three different servers for generic, mappings, wikidata, abstract extraction. The repository at https://git.informatik.uni-leipzig.de/dbpedia-assoc/marvin-config can be used to fork the architecture for creating extensions, developing new extractors or debugging old ones. -Fixes and patches will be manually deployed via a freah `git clone` from the `master` branch of the [DBpedia Extraction Framework](https://github.com/dbpedia/extraction-framework/). +Fixes and patches will be manually deployed via a fresh `git clone` from the `master` branch of the [DIEF (DBpedia Information Extraction Framework)](https://github.com/dbpedia/extraction-framework/). ## Contributions & License All scripts and config files in this repo are CC-0 (Public Domain). We accept pull requests to improve the config files, all contributions will be merged as CC-0. ## Run a MARVIN extraction + ``` git clone https://git.informatik.uni-leipzig.de/dbpedia-assoc/marvin-config cd marvin-config -# Romanian extraction, very small +# delete previous versions of the DIEF +rm -rf marvin-config/extraction-framework +./setup-dief.sh +# test Romanian extraction, very small ./marvin_extraction_run.sh --group=test ``` @@ -28,7 +32,7 @@ To run the other extractions, use either ## Cronjobs -Below is a list +Below is a list of cronjobs we use on the different servers ## Acknowledgements diff --git a/functions.sh b/functions.sh index 16186ac..6d71322 100755 --- a/functions.sh +++ b/functions.sh @@ -1,28 +1,5 @@ #!/bin/bash -# check git, curl, maven, java (1.8), lbzip2 - - -prepareExtractionFramework(){ - if [ "$SKIPDIEFINSTALL" = "false" ] - then - # TODO make sure this contains marvin-config/marvin-extraction and replace with -rf - echo "deleting $DIEFDIR" - rm -rI $DIEFDIR - git clone "https://github.com/dbpedia/extraction-framework.git" $DIEFDIR - cd $DIEFDIR - # todo add config - #cd $ROOT && cp $ROOT/config.d/universal.properties.template $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties; - #sed -i -e 's,$BASEDIR,'$EXTRACTIONBASEDIR',g' $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties; - #sed -i -e 's,$LOGDIR,'$LOGDIR',g' $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties; - - mvn clean install - else - echo "skipping DIEF installation" - fi -} - - # downlaod and extract data extractDumps() { cd $DIEFDIR/dump; diff --git a/marvin_extraction_run.sh b/marvin_extraction_run.sh index a55d328..9b7d080 100755 --- a/marvin_extraction_run.sh +++ b/marvin_extraction_run.sh @@ -82,12 +82,9 @@ fi source functions.sh ####################### -# run +# RUN (requires setup-dief.sh) ####################### -# PRE-PROCESSING -prepareExtractionFramework; - # DOWNLOAD ONTOLOGY and MAPPINGS cd $DIEFDIR/core; ../run download-ontology &> $LOGDIR/downloadOntology.log; @@ -98,7 +95,7 @@ cd $DIEFDIR/dump ../run download $CONFIGDIR/download.$GROUP.properties &> $LOGDIR/downloadWikidumps.log; # EXTRACT -extractDumps &> $LOGDIR/extraction.log; +#extractDumps &> $LOGDIR/extraction.log; # POST-PROCESSING #postProcessing 2> $LOGDIR/postProcessing.log; diff --git a/setup-dief.sh b/setup-dief.sh new file mode 100755 index 0000000..f321a13 --- /dev/null +++ b/setup-dief.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )/marvin-extraction" +CONFIGDIR="$ROOT/extractionConfiguration" +DIEFDIR="$ROOT/extraction-framework" + +git clone "https://github.com/dbpedia/extraction-framework.git" $DIEFDIR +cd $DIEFDIR +# todo add config +#cd $ROOT && cp $ROOT/config.d/universal.properties.template $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties; +#sed -i -e 's,$BASEDIR,'$EXTRACTIONBASEDIR',g' $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties; +#sed -i -e 's,$LOGDIR,'$LOGDIR',g' $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties; + +mvn clean install +