Skip to content

Commit

Permalink
moved variables to function
Browse files Browse the repository at this point in the history
  • Loading branch information
kurzum committed Oct 11, 2019
1 parent 357da28 commit 76923bb
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 46 deletions.
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ We accept pull requests to improve the config files, all contributions will be m
```
git clone https://git.informatik.uni-leipzig.de/dbpedia-assoc/marvin-config
cd marvin-config
# delete previous versions of the DIEF
# (optional) delete previous versions of the DIEF
rm -rf marvin-config/extraction-framework
# install dief in marvin-extraction/extraction-framework
./setup-dief.sh
# test Romanian extraction, very small
# test run Romanian extraction, very small
./marvin_extraction_run.sh --group=test
```

Expand All @@ -32,7 +33,11 @@ To run the other extractions, use either

## Cronjobs

Below is a list of cronjobs we use on the different servers
Below is a list of cronjobs we use on the different servers:

```
TODO
```


## Acknowledgements
Expand All @@ -44,7 +49,9 @@ We thank Sören Auer and the Technische Informationsbibliothek (TIB) for providi

This contribution by TIB to DBpedia & its community is a great push towards incentivizing Open Data and establishing a global and national research and innovation data infrastructure.

# Workflow
# Workflow Description

##

## Downloading the wikimedia dumps
TODO
Expand Down
6 changes: 4 additions & 2 deletions extractionConfiguration/universal.properties.template
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ dbpedia-version=2018-10

# Replace with your Wikipedia dump download directory (should not change over the course of a release)
# base-dir=/data/extraction/wikidumps/
base-dir=$BASEDIR
# AUTOMATICALLY SET BY setup-dief.sh
# base-dir=$BASEDIR

# The log file directory - used to store all log files created in the course of all extractions
# log-dir=/data/extraction/logs/extraction/
log-dir=$LOGDIR/extraction/
# AUTOMATICALLY SET BY setup-dief.sh
# log-dir=$LOGDIR/extraction/

# to forward extraction summaries and warnings via the slack API, use this option
-slack-webhook=https://hooks.slack.com/services/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
Expand Down
22 changes: 22 additions & 0 deletions functions.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
#!/bin/bash


##############
# setup paths
##############

ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
CONFIGDIR="$ROOT/extractionConfiguration"
DIEFDIR="$ROOT/marvin-extraction/extraction-framework" && mkdir -p $LOGDIR
LOGDIR="$ROOT/marvin-extraction/logs/$(date +%Y-%m-%d)"
EXTRACTIONBASEDIR="$ROOT/marvin-extraction/wikidumps" && mkdir -p $EXTRACTIONBASEDIR

# TODO
RELEASEDIR="$ROOT/marvin-extraction/release"
DATAPUSMAVENPLUGINPOMDIR="$ROOT/databus-maven-plugin"
DATAPUSMAVENPLUGINPOMGIT="https://github.com/dbpedia/databus-maven-plugin.git"

mkdir -p $RELEASEDIR

##############
# functions
##############

# downlaod and extract data
extractDumps() {
cd $DIEFDIR/dump;
Expand Down
42 changes: 10 additions & 32 deletions marvin_extraction_run.sh
Original file line number Diff line number Diff line change
@@ -1,38 +1,19 @@
#!/bin/bash

HELP="usage:
--group={test|generic|mappings|wikidata} [--databus-deploy|--skip-dief-install]
--group={test|generic|mappings|wikidata} [--databus-deploy]
description:
--group={test|generic|mappings|wikidata} : required
selects download.\$GROUP.properties and extraction.\$GROUP.properties from extractionConfig dir
Some exceptions are hard coded like 'extraction.generic.en.properties'
[--skip-dief-install] : optional
'false' -> each run does a fresh checkout install of the DIEF (DBpedia Information Extraction Framework)
'true' -> skipped
"

#######################
# include all functions and path variables
#######################
source functions.sh

##############
# setup paths
##############
ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )/marvin-extraction"
CONFIGDIR="$ROOT/extractionConfiguration"

# set and create
LOGDIR="$ROOT/logs/$(date +%Y-%m-%d)" && mkdir -p $LOGDIR
DIEFDIR="$ROOT/extraction-framework"

# TODO
EXTRACTIONBASEDIR="$ROOT/wikidumps"
DATAPUSMAVENPLUGINPOMDIR="$ROOT/databus-maven-plugin"
RELEASEDIR="$ROOT/release"
DATAPUSMAVENPLUGINPOMGIT="https://github.com/dbpedia/databus-maven-plugin.git"

mkdir -p $EXTRACTIONBASEDIR
mkdir -p $RELEASEDIR

#################
#check arguments
Expand Down Expand Up @@ -76,23 +57,20 @@ then
fi


#######################
# include all functions
#######################
source functions.sh


#######################
# RUN (requires setup-dief.sh)
#######################

# DOWNLOAD ONTOLOGY and MAPPINGS
cd $DIEFDIR/core;
../run download-ontology &> $LOGDIR/downloadOntology.log;
../run download-mappings &> $LOGDIR/downloadMappings.log;
cd $DIEFDIR/core
../run download-ontology &> $LOGDIR/downloadOntology.log
../run download-mappings &> $LOGDIR/downloadMappings.log

# DOWNLOAD WIKIDUMPS
cd $DIEFDIR/dump
../run download $CONFIGDIR/download.$GROUP.properties &> $LOGDIR/downloadWikidumps.log;
../run download $CONFIGDIR/download.$GROUP.properties &> $LOGDIR/downloadWikidumps.log

# EXTRACT
#extractDumps &> $LOGDIR/extraction.log;
Expand Down
17 changes: 9 additions & 8 deletions setup-dief.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
#!/bin/bash

ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )/marvin-extraction"
CONFIGDIR="$ROOT/extractionConfiguration"
DIEFDIR="$ROOT/extraction-framework"
# get all variables and functions
source functions.sh

cd marvin-extraction
git clone "https://github.com/dbpedia/extraction-framework.git" $DIEFDIR

cd $DIEFDIR
# todo add config
#cd $ROOT && cp $ROOT/config.d/universal.properties.template $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties;
#sed -i -e 's,$BASEDIR,'$EXTRACTIONBASEDIR',g' $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties;
#sed -i -e 's,$LOGDIR,'$LOGDIR',g' $EXTRACTIONFRAMEWORKDIR/core/src/main/resources/universal.properties;
# concat universial props
echo "base-dir=$EXTRACTIONBASEDIR" > $DIEFDIR/core/src/main/resources/universal.properties
echo "log-dir=$LOGDIR/extraction/" >> $DIEFDIR/core/src/main/resources/universal.properties
cat $CONFIGDIR/universal.properties.template >> $DIEFDIR/core/src/main/resources/universal.properties

mvn clean install
mvn clean install &> $LOGDIR/installDIEF.log

0 comments on commit 76923bb

Please sign in to comment.