From e0afee33e0e4c0ba13aba18b9223996a23eebac5 Mon Sep 17 00:00:00 2001 From: holegar Date: Fri, 4 Oct 2024 10:29:43 +0100 Subject: [PATCH] Fixed seman motif issue of arabidopsis in brassica --- ...-cfg.sh => brassicaceae-premium-58-cfg.sh} | 10 +- ...premium-58-metadata-descriptor.properties} | 4 +- .../config/datasets/cereals-premium-58-cfg.sh | 2 +- ...58-cfg.sh => vegetables-premium-58-cfg.sh} | 8 +- ...premium-58-metadata-descriptor.properties} | 0 rres-endpoints/endpoint-steps/neo-index.sh | 92 +++++++++---------- rres-endpoints/utils/neo4j/neo-stats.sh | 2 +- 7 files changed, 58 insertions(+), 60 deletions(-) rename rres-endpoints/config/datasets/{brassica-58-cfg.sh => brassicaceae-premium-58-cfg.sh} (79%) rename rres-endpoints/config/datasets/{brassica-58-metadata-descriptor.properties => brassicaceae-premium-58-metadata-descriptor.properties} (88%) rename rres-endpoints/config/datasets/{solanaceae-58-cfg.sh => vegetables-premium-58-cfg.sh} (81%) rename rres-endpoints/config/datasets/{solanaceae-58-metadata-descriptor.properties => vegetables-premium-58-metadata-descriptor.properties} (100%) diff --git a/rres-endpoints/config/datasets/brassica-58-cfg.sh b/rres-endpoints/config/datasets/brassicaceae-premium-58-cfg.sh similarity index 79% rename from rres-endpoints/config/datasets/brassica-58-cfg.sh rename to rres-endpoints/config/datasets/brassicaceae-premium-58-cfg.sh index 4e3ccf3..372e586 100644 --- a/rres-endpoints/config/datasets/brassica-58-cfg.sh +++ b/rres-endpoints/config/datasets/brassicaceae-premium-58-cfg.sh @@ -1,23 +1,23 @@ # /home/data/knetminer/etl-test/brassica/58/generic/knowledge-network.oxl # Unfortunately, there isn't consistence, so we can use KETL_DATASET_ID here -oxl_home="$KNET_HOME/etl-test/brassica/$KETL_DATASET_VERSION" +oxl_home="$KNET_HOME/etl-test/brassicaceae-premium/$KETL_DATASET_VERSION" export KETL_SRC_OXL="$oxl_home/generic/knowledge-network.oxl" -export KETL_OUT="$KETL_OUT_HOME/$KETL_DATASET_ID/$KETL_DATASET_VERSION" +export KETL_OUT="$KETL_OUT_HOME/$KETL_DATASET_ID/v$KETL_DATASET_VERSION-RC2" ## Neo  # export KETL_HAS_NEO4J=true -export KETL_NEO_VERSION='5.20.0' +export KETL_NEO_VERSION='5.23.0' export NEO4J_HOME="$KNET_SOFTWARE/neo4j-community-$KETL_NEO_VERSION-etl" ## Knet Initialiser # # The name within the code base, which identifies the config dir to be # used for the KnetMiner initialiser -export KNET_INIT_DATASET_ID="brassica" +export KNET_INIT_DATASET_ID="brassicaceae-premium" ##### Values for server-sync.sh @@ -32,7 +32,7 @@ export KNET_NEO_DATA=/opt/data # # Test servers like babvs73 -export KNET_TESTINST_DATA_PATH=/opt/data/knetminer-datasets/brassica +export KNET_TESTINST_DATA_PATH=/opt/data/knetminer-datasets/brassicaceae-premium # babvs73: based on old Traverser, available at knetminer.com/ci-test # babvs72: based on Neo4j+OXL Traverser, available at knetminer.com/ci-test-cypher export KNET_TESTINST_SSH="brandizim@babvs73.rothamsted.ac.uk brandizim@babvs72.rothamsted.ac.uk" diff --git a/rres-endpoints/config/datasets/brassica-58-metadata-descriptor.properties b/rres-endpoints/config/datasets/brassicaceae-premium-58-metadata-descriptor.properties similarity index 88% rename from rres-endpoints/config/datasets/brassica-58-metadata-descriptor.properties rename to rres-endpoints/config/datasets/brassicaceae-premium-58-metadata-descriptor.properties index da621bb..2e4e50f 100644 --- a/rres-endpoints/config/datasets/brassica-58-metadata-descriptor.properties +++ b/rres-endpoints/config/datasets/brassicaceae-premium-58-metadata-descriptor.properties @@ -2,8 +2,8 @@ # https://github.com/Rothamsted/knetbuilder/tree/master/ondex-knet-builder/modules/rdf-export-2 # datasetId = brassica -datasetAccession = KnetMiner:Brassica -datasetTitle = Knetminer's knowledge graph about brassica +datasetAccession = KnetMiner:Brassicaceae-premium +datasetTitle = Knetminer's knowledge graph about brassicaceae datasetDescription = \ Knetminer is a gene discovery platform, which allows for exploring knwoledge graphs computed \ from common plant biology data, such as ENSEMBL, UniProt, TAIR, PUBMED and more.\n\ diff --git a/rres-endpoints/config/datasets/cereals-premium-58-cfg.sh b/rres-endpoints/config/datasets/cereals-premium-58-cfg.sh index 56932e8..bcd9de0 100644 --- a/rres-endpoints/config/datasets/cereals-premium-58-cfg.sh +++ b/rres-endpoints/config/datasets/cereals-premium-58-cfg.sh @@ -5,7 +5,7 @@ oxl_home="$KNET_HOME/etl-test/cereals-premium/$KETL_DATASET_VERSION" export KETL_SRC_OXL="$oxl_home/generic/knowledge-network.oxl" # Sam 20240909 - New versioning convention -export KETL_OUT="$KETL_OUT_HOME/$KETL_DATASET_ID/v$KETL_DATASET_VERSION-RC2" +export KETL_OUT="$KETL_OUT_HOME/$KETL_DATASET_ID/v$KETL_DATASET_VERSION-RC1" ## Neo  # diff --git a/rres-endpoints/config/datasets/solanaceae-58-cfg.sh b/rres-endpoints/config/datasets/vegetables-premium-58-cfg.sh similarity index 81% rename from rres-endpoints/config/datasets/solanaceae-58-cfg.sh rename to rres-endpoints/config/datasets/vegetables-premium-58-cfg.sh index 8d173fa..5a64f22 100644 --- a/rres-endpoints/config/datasets/solanaceae-58-cfg.sh +++ b/rres-endpoints/config/datasets/vegetables-premium-58-cfg.sh @@ -1,23 +1,23 @@ # /home/data/knetminer/etl-test/solanaceae/58/generic/knowledge-network.oxl # Unfortunately, there isn't consistence, so we can use KETL_DATASET_ID here -oxl_home="$KNET_HOME/etl-test/solanaceae/$KETL_DATASET_VERSION" +oxl_home="$KNET_HOME/etl-test/vegetables-premium/$KETL_DATASET_VERSION" export KETL_SRC_OXL="$oxl_home/generic/knowledge-network.oxl" -export KETL_OUT="$KETL_OUT_HOME/$KETL_DATASET_ID/$KETL_DATASET_VERSION" +export KETL_OUT="$KETL_OUT_HOME/$KETL_DATASET_ID/v$KETL_DATASET_VERSION-RC2" ## Neo  # export KETL_HAS_NEO4J=true -export KETL_NEO_VERSION='5.20.0' +export KETL_NEO_VERSION='5.23.0' export NEO4J_HOME="$KNET_SOFTWARE/neo4j-community-$KETL_NEO_VERSION-etl" ## Knet Initialiser # # The name within the code base, which identifies the config dir to be # used for the KnetMiner initialiser -export KNET_INIT_DATASET_ID="solanaceae" +export KNET_INIT_DATASET_ID="vegetables-premium" ##### Values for server-sync.sh diff --git a/rres-endpoints/config/datasets/solanaceae-58-metadata-descriptor.properties b/rres-endpoints/config/datasets/vegetables-premium-58-metadata-descriptor.properties similarity index 100% rename from rres-endpoints/config/datasets/solanaceae-58-metadata-descriptor.properties rename to rres-endpoints/config/datasets/vegetables-premium-58-metadata-descriptor.properties diff --git a/rres-endpoints/endpoint-steps/neo-index.sh b/rres-endpoints/endpoint-steps/neo-index.sh index a9a8db3..26499e7 100644 --- a/rres-endpoints/endpoint-steps/neo-index.sh +++ b/rres-endpoints/endpoint-steps/neo-index.sh @@ -1,47 +1,45 @@ -# Runs the KnetMiner initialising tools to create indexes and other data for the -# new KnetMiner API -set -e - -oxl_src="$1" # the OXL with URIs (mandatory) and metadata annotations (optional) -out_flag="$2" # Creates this file to signal that the step was successfully completed - -neo_url=`ketl_get_neo_url` - -export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -Dneo4j.boltUrl='$neo_url'" -export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -Dneo4j.user='$KETL_NEO_USR'" -export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -Dneo4j.password='$KETL_NEO_PWD'" - - -printf "\n\n Creating KnetMiner initialisation files\n\n" -printf "\n ====> MAKE SURE $KNET_INITIALIZER_HOME IS UPDATED!!!\n\n" - -knet_cfg="$KETL_OUT/tmp/knet-init" -# Re-creating it all is the safest option, comment this at your own risk, and -# DO NOT push the commented version back to github (or leave it in the RRes file -# system). -rm -Rf "$knet_cfg" -mkdir -p "$knet_cfg" - -"$KNET_WEBAPP/docker/dataset-init.sh" --force "$knet_cfg" "$KNET_INIT_DATASET_ID" -cp -R -v "$KETL_HOME/config/knet-init"/* "$knet_cfg/config" - -# This does all of base indexing, --neo-index and --neo-motifs in one go (in this order). -# --neo-motifs is provisional, we need it until we can replace it with the new traverser. -# -printf "\n\n Creating Neo indexing (full-text and semantic motifs)\n\n" - -# Comment this to skip the traverser. BE CAREFUL -neo_motifs_flag='--neo-motifs' - -# Comment this to skip the traverser. BE CAREFUL -neo_motifs_flag='--neo-motifs' -"$KNET_INITIALIZER_HOME/knet-init.sh" \ --c "$knet_cfg/config/config-etl.yml" --neo-index=config:// $neo_motifs_flag --in "$oxl_src" - --c "$knet_cfg/config/config-etl.yml" --neo-index=config:// $neo_motifs_flag --in "$oxl_src" - -# Sam 2024/09/13: Run the Cypher query to generate stats node in Neo4j -source "$KETL_HOME/utils/neo4j/neo-stats.sh" - -echo -e "\nAll Neo4j indexing and stats generation done\n" -echo `date` >"$out_flag" +# Runs the KnetMiner initialising tools to create indexes and other data for the +# new KnetMiner API +set -e + +oxl_src="$1" # the OXL with URIs (mandatory) and metadata annotations (optional) +out_flag="$2" # Creates this file to signal that the step was successfully completed + +neo_url=`ketl_get_neo_url` + +export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -Dneo4j.boltUrl='$neo_url'" +export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -Dneo4j.user='$KETL_NEO_USR'" +export JAVA_TOOL_OPTIONS="$JAVA_TOOL_OPTIONS -Dneo4j.password='$KETL_NEO_PWD'" + + +printf "\n\n Creating KnetMiner initialisation files\n\n" +printf "\n ====> MAKE SURE $KNET_INITIALIZER_HOME IS UPDATED!!!\n\n" + +knet_cfg="$KETL_OUT/tmp/knet-init" +# Re-creating it all is the safest option, comment this at your own risk, and +# DO NOT push the commented version back to github (or leave it in the RRes file +# system). +rm -Rf "$knet_cfg" +mkdir -p "$knet_cfg" + +"$KNET_WEBAPP/docker/dataset-init.sh" --force "$knet_cfg" "$KNET_INIT_DATASET_ID" +cp -R -v "$KETL_HOME/config/knet-init"/* "$knet_cfg/config" + +# This does all of base indexing, --neo-index and --neo-motifs in one go (in this order). +# --neo-motifs is provisional, we need it until we can replace it with the new traverser. +# +printf "\n\n Creating Neo indexing (full-text and semantic motifs)\n\n" + +# Comment this to skip the traverser. BE CAREFUL +neo_motifs_flag='--neo-motifs' + +# Comment this to skip the traverser. BE CAREFUL +neo_motifs_flag='--neo-motifs' +"$KNET_INITIALIZER_HOME/knet-init.sh" \ +-c "$knet_cfg/config/config-etl.yml" --neo-index=config:// $neo_motifs_flag --in "$oxl_src" + +# Sam 2024/09/13: Run the Cypher query to generate stats node in Neo4j +source "$KETL_HOME/utils/neo4j/neo-stats.sh" + +echo -e "\nAll Neo4j indexing and stats generation done\n" +echo `date` >"$out_flag" diff --git a/rres-endpoints/utils/neo4j/neo-stats.sh b/rres-endpoints/utils/neo4j/neo-stats.sh index b5fc0a7..0f82fc6 100644 --- a/rres-endpoints/utils/neo4j/neo-stats.sh +++ b/rres-endpoints/utils/neo4j/neo-stats.sh @@ -8,7 +8,7 @@ CREATE (s:Metadata { nodeCount: nodeCount, edgeCount: edgeCount, version: \"${KETL_DATASET_VERSION}\", - fileLocation: \"s3://knet-data-store/${KETL_DATASET_ID}/v${KETL_DATASET_VERSION}-RC1\", + fileLocation: \"s3://knet-data-store/${KETL_DATASET_ID}/v${KETL_DATASET_VERSION}-RC2\", date: \"${current_date}\" })"