diff --git a/.docker-compose/docker-compose.h2.dev.yaml b/.docker-compose/docker-compose.h2.dev.yaml new file mode 100644 index 0000000..7051730 --- /dev/null +++ b/.docker-compose/docker-compose.h2.dev.yaml @@ -0,0 +1,61 @@ +version: '3' + +services: + + fits: + image: artourkin/fits-web:main + container_name: fits + env_file: ../.env + networks: + - web + restart: unless-stopped + ports: + - 8081:8080 + + + rest: + container_name: rest + build: + context: .. + dockerfile: ../Dockerfile + env_file: ../.env + networks: + - web + restart: unless-stopped + deploy: + replicas: 1 + ports: + - 8082:8080 + depends_on: + - fits + - db-docker + + + web: + build: + context: .. + dockerfile: ../web/Dockerfile + container_name: web + env_file: ../.env + networks: + - web + restart: unless-stopped + ports: + - 8080:3000 + + + db-docker: + image: oscarfonts/h2 + container_name: db-docker + env_file: ../.env + environment: + - H2_OPTIONS=-ifNotExists + networks: + - web + restart: unless-stopped + ports: + - 1521:1521 + - 81:81 + +networks: + web: \ No newline at end of file diff --git a/.docker-compose/docker-compose.mysql.cluster.yaml b/.docker-compose/docker-compose.mysql.cluster.yaml new file mode 100644 index 0000000..6aae85a --- /dev/null +++ b/.docker-compose/docker-compose.mysql.cluster.yaml @@ -0,0 +1,179 @@ +version: '3' + +services: + + fits: + build: + context: .. + dockerfile: ../fits/Dockerfile + container_name: fits + env_file: ../.env + networks: + - web + restart: unless-stopped + ports: + - 8081:8080 + + rest: + build: + context: .. + dockerfile: ../Dockerfile + env_file: ../.env + environment: + - SPRING_DATASOURCE_URL=jdbc:mysql://mysql-router:6446/fitsinn + - DB_SELECTOR=mysql + networks: + - web + restart: unless-stopped + deploy: + replicas: 3 + depends_on: + - fits + - mysql-router + + web: + build: + context: .. + dockerfile: ../web/Dockerfile.dev + container_name: web + env_file: ../.env + networks: + - web + restart: unless-stopped + depends_on: + - rest + ports: + - 8080:3000 + + + adminer: + image: adminer + container_name: adminer + env_file: ../.env + restart: unless-stopped + networks: + - web + ports: + - 8090:8080 + + mysql-server-1: + container_name: mysql-server-1 + env_file: + - ../config/mysql-cluster/mysql-server.env + image: mysql/mysql-server:8.0.12 + networks: + - web + command: + [ + "mysqld", + "--server_id=1", + "--binlog_checksum=NONE", + "--gtid_mode=ON", + "--enforce_gtid_consistency=ON", + "--log_bin", + "--log_slave_updates=ON", + "--master_info_repository=TABLE", + "--relay_log_info_repository=TABLE", + "--transaction_write_set_extraction=XXHASH64", + "--user=mysql", + "--skip-host-cache", + "--skip-name-resolve", + "--default_authentication_plugin=mysql_native_password", + ] + + mysql-server-2: + container_name: mysql-server-2 + env_file: + - ../config/mysql-cluster/mysql-server.env + image: mysql/mysql-server:8.0.12 + networks: + - web + command: + [ + "mysqld", + "--server_id=2", + "--binlog_checksum=NONE", + "--gtid_mode=ON", + "--enforce_gtid_consistency=ON", + "--log_bin", + "--log_slave_updates=ON", + "--master_info_repository=TABLE", + "--relay_log_info_repository=TABLE", + "--transaction_write_set_extraction=XXHASH64", + "--user=mysql", + "--skip-host-cache", + "--skip-name-resolve", + "--default_authentication_plugin=mysql_native_password", + ] + + + mysql-server-3: + container_name: mysql-server-3 + env_file: + - ../config/mysql-cluster/mysql-server.env + image: mysql/mysql-server:8.0.12 + networks: + - web + command: + [ + "mysqld", + "--server_id=3", + "--binlog_checksum=NONE", + "--gtid_mode=ON", + "--enforce_gtid_consistency=ON", + "--log_bin", + "--log_slave_updates=ON", + "--master_info_repository=TABLE", + "--relay_log_info_repository=TABLE", + "--transaction_write_set_extraction=XXHASH64", + "--user=mysql", + "--skip-host-cache", + "--skip-name-resolve", + "--default_authentication_plugin=mysql_native_password", + ] + + mysql-shell: + container_name: mysql-shell + env_file: + - ../config/mysql-cluster/mysql-shell.env + image: neumayer/mysql-shell-batch + networks: + - web + volumes: + - ./mysql-cluster/scripts/:/scripts/ + depends_on: + - mysql-server-1 + - mysql-server-2 + - mysql-server-3 + + mysql-router: + container_name: mysql-router + env_file: + - ../config/mysql-cluster/mysql-router.env + image: mysql/mysql-router:8.0 + networks: + - web + ports: + - 3306:6446 + depends_on: + - mysql-server-1 + - mysql-server-2 + - mysql-server-3 + - mysql-shell + restart: on-failure + + nginx: + image: nginx + container_name: nginx + env_file: ../.env + volumes: + - ./config/nginx/nginx.conf:/etc/nginx/conf.d/default.conf + ports: + - 8082:80 + networks: + - web + depends_on: + - rest + +networks: + web: \ No newline at end of file diff --git a/.docker-compose/docker-compose.mysql.dev.yaml b/.docker-compose/docker-compose.mysql.dev.yaml new file mode 100644 index 0000000..dda1c5a --- /dev/null +++ b/.docker-compose/docker-compose.mysql.dev.yaml @@ -0,0 +1,89 @@ +version: '3' + +services: + + fits: + build: + context: .. + dockerfile: ../fits/Dockerfile + container_name: fits + env_file: ../.env + networks: + - web + restart: unless-stopped + ports: + - 8081:8080 + + rest: + build: + context: .. + dockerfile: ../Dockerfile + env_file: ../.env + networks: + - web + restart: unless-stopped + environment: + - LOGGING_LEVEL_ORG_HIBERNATE_SQL=DEBUG + - SPRING_JPA_SHOW_SQL=true + - DB_SELECTOR=mysql + deploy: + replicas: 1 + ports: + - 8092:8080 + depends_on: + - fits + - db-docker + + web: + build: + context: .. + dockerfile: ../web/Dockerfile + container_name: web + env_file: ../.env + networks: + - web + restart: unless-stopped + ports: + - 8080:3000 + + db-docker: + image: mysql:8.0 + container_name: db-docker + env_file: ../.env + environment: + MYSQL_DATABASE: fitsinn + MYSQL_USER: user + MYSQL_PASSWORD: pass + MYSQL_ROOT_PASSWORD: pass + networks: + - web + restart: unless-stopped + ports: + - 3306:3306 + + + adminer: + image: adminer + container_name: adminer + env_file: ../.env + restart: unless-stopped + networks: + - web + ports: + - 8090:8080 + + nginx: + image: nginx + container_name: nginx + env_file: ../.env + volumes: + - ./config/nginx/nginx.conf:/etc/nginx/conf.d/default.conf + ports: + - 8082:80 + networks: + - web + depends_on: + - rest + +networks: + web: \ No newline at end of file diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 374a790..7366d98 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -3,6 +3,8 @@ name: Docker Image CI on: push: branches: [main, dev, release/*] + tags: + - 'v*' pull_request: branches: [main] @@ -29,13 +31,33 @@ jobs: key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-maven- + + - name: Extract Git metadata + id: vars + run: | + echo "GIT_SHA_SHORT=$(git rev-parse --short HEAD)" >> "$GITHUB_ENV" + echo "BRANCH_NAME=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> "$GITHUB_ENV" + echo "GIT_TAG=${GITHUB_REF#refs/tags/}" >> "$GITHUB_ENV" + + - name: Print GitHub ref and other variables + run: | + echo "GitHub ref: $GITHUB_REF" + echo "Git tag: $GIT_TAG" + echo "Branch name: $BRANCH_NAME" + echo "Commit SHA: $GIT_SHA_SHORT" + - name: Build and push FITSInn REST - uses: docker/build-push-action@v5 - with: - context: . - platforms: linux/amd64,linux/arm64 - push: true - tags: artourkin/fitsinn-rest:${{ github.ref_name }} + run: | + IMAGE_NAME=artourkin/fitsinn-rest + CONTEXT=. + if [ "${{ github.ref }}" == "refs/heads/main" ]; then + docker buildx build --push --tag $IMAGE_NAME:latest $CONTEXT + elif [[ "${{ github.ref }}" == refs/tags/* ]]; then + docker buildx build --push --tag $IMAGE_NAME:$GIT_TAG $CONTEXT + else + docker buildx build --push --tag $IMAGE_NAME:$BRANCH_NAME-$GIT_SHA_SHORT $CONTEXT + fi + - name: Cache node modules id: cache-npm uses: actions/cache@v3 @@ -53,17 +75,29 @@ jobs: name: List the state of node modules continue-on-error: true run: npm list + - name: Build and push FITSInn WEB - uses: docker/build-push-action@v5 - with: - file: ./web/Dockerfile - platforms: linux/amd64,linux/arm64 - push: true - tags: artourkin/fitsinn-web:${{ github.ref_name }} + run: | + IMAGE_NAME=artourkin/fitsinn-web + CONTEXT=./web + if [ "${{ github.ref }}" == "refs/heads/main" ]; then + docker buildx build --push --tag $IMAGE_NAME:latest $CONTEXT + elif [[ "${{ github.ref }}" == refs/tags/* ]]; then + docker buildx build --push --tag $IMAGE_NAME:$GIT_TAG $CONTEXT + else + docker buildx build --push --tag $IMAGE_NAME:$BRANCH_NAME-$GIT_SHA_SHORT $CONTEXT + fi + - name: Build and push FITS WEB - uses: docker/build-push-action@v5 - with: - file: ./fits/Dockerfile - platforms: linux/amd64,linux/arm64/v8 - push: true - tags: artourkin/fits-web:${{ github.ref_name }} + run: | + IMAGE_NAME=artourkin/fits-web + CONTEXT=./fits + if [ "${{ github.ref }}" == "refs/heads/main" ]; then + docker buildx build --push --tag $IMAGE_NAME:latest $CONTEXT + elif [[ "${{ github.ref }}" == refs/tags/* ]]; then + docker buildx build --push --tag $IMAGE_NAME:$GIT_TAG $CONTEXT + else + docker buildx build --push --tag $IMAGE_NAME:$BRANCH_NAME-$GIT_SHA_SHORT $CONTEXT + fi + + diff --git a/.mvn/wrapper/maven-wrapper.jar b/.mvn/wrapper/maven-wrapper.jar new file mode 100644 index 0000000..cb28b0e Binary files /dev/null and b/.mvn/wrapper/maven-wrapper.jar differ diff --git a/.mvn/wrapper/maven-wrapper.properties b/.mvn/wrapper/maven-wrapper.properties new file mode 100644 index 0000000..7d02699 --- /dev/null +++ b/.mvn/wrapper/maven-wrapper.properties @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.7/apache-maven-3.8.7-bin.zip +wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar diff --git a/Dockerfile b/Dockerfile index 4bde26b..7270632 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM maven:3.9.0 as builder COPY . /app WORKDIR /app -RUN mvn -pl !web clean install -Pprod +RUN --mount=type=cache,target=/root/.m2 mvn -pl -web clean install FROM openjdk:21-jdk-slim WORKDIR /app diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 0000000..d8d13ab --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1,16 @@ +FROM maven:3.9.0 as builder +COPY . /app +WORKDIR /app +RUN --mount=type=cache,target=/root/.m2 mvn -pl -web clean install -DskipTests + +FROM openjdk:21-jdk-slim +WORKDIR /app +RUN printenv +COPY --from=builder /app/main/target/fitsinn-main-*.jar ./app.jar + +RUN chown 1001 ./app.jar \ + && chmod "g+rwX" ./app.jar + +USER 1001 +EXPOSE 8080 +ENTRYPOINT ["java", "-jar", "app.jar"] diff --git a/README.md b/README.md index b653042..6eb7969 100644 --- a/README.md +++ b/README.md @@ -34,15 +34,20 @@ Installation of FITSInn to Docker Swarm or K8S is possible, but is not currently ### Local build -Building the Docker images from scratch and starting FITSInn is executed via: - +Building the Docker images from scratch and starting FITSInn is executed via: ``` docker-compose -f docker-compose.dev.yaml up --build ``` File uploading using bash: +``` +bash ./utils/fileupload.sh http://localhost:8082 ~/rnd/data/govdocs_fits/govdocs1/000/ +``` -bash fileupload.sh http://localhost:8082 ~/rnd/data/govdocs_fits/govdocs1/000/ +File uploading using python (pip package requests in necessary): +``` +python ./utils/fileupload.py http://localhost:8082/multipleupload ~/rnd/data/govdocs_fits/govdocs1/000/ 100 3 +``` ## Issues diff --git a/config/clickhouse/config.xml b/config/clickhouse/config.xml new file mode 100644 index 0000000..6c4a403 --- /dev/null +++ b/config/clickhouse/config.xml @@ -0,0 +1,1038 @@ + + + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + + 1000M + 10 + + + + + + + + + + + + + + 8123 + + + 9000 + + + 9004 + + + + + + + + + + + + + + + 9009 + + + + + + + + + + + + + + + + + + + + + + + + + + + + 4096 + + + 3 + + + + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + deflate + + + medium + + + -1 + -1 + + + false + + + + + + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + + /etc/clickhouse-server/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + + + + + 100 + + + 0 + + + + 10000 + + + 0.9 + + + 4194304 + + + 0 + + + + + + 8589934592 + + + 5368709120 + + + + /var/lib/clickhouse/ + + + /var/lib/clickhouse/tmp/ + + + + + + /var/lib/clickhouse/user_files/ + + + + + + + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + + + + + + default + + + + + + + + + + + + default + + + + + + + + + true + + + false + + + + + + + + + + + + + + + localhost + 9000 + + + + + + + + + localhost + 9000 + + + + + localhost + 9000 + + + + + + + 127.0.0.1 + 9000 + + + + + 127.0.0.2 + 9000 + + + + + + true + + 127.0.0.1 + 9000 + + + + true + + 127.0.0.2 + 9000 + + + + + + + localhost + 9440 + 1 + + + + + + + localhost + 9000 + + + + + localhost + 1 + + + + + + + + + + + + + + + + + + + + + + + + 3600 + + + + 3600 + + + 60 + + + + + + + + + + + + + system + query_log
+ + toYYYYMM(event_date) + + + + + + 7500 +
+ + + + system + trace_log
+ + toYYYYMM(event_date) + 7500 +
+ + + + system + query_thread_log
+ toYYYYMM(event_date) + 7500 +
+ + + + + + + + system + metric_log
+ 7500 + 1000 +
+ + + + system + asynchronous_metric_log
+ + 60000 +
+ + + + + + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + + system + opentelemetry_span_log
+ 7500 +
+ + + + + system + crash_log
+ + + 1000 +
+ + + + + + + + + + + + + + + + + + *_dictionary.xml + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + + + /var/lib/clickhouse/format_schemas/ + + + + + hide encrypt/decrypt arguments + ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\) + + \1(???) + + + + + + + + + + false + + false + + + https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 + + + + +
\ No newline at end of file diff --git a/config/clickhouse/initdb.sql b/config/clickhouse/initdb.sql new file mode 100644 index 0000000..d97dce4 --- /dev/null +++ b/config/clickhouse/initdb.sql @@ -0,0 +1,10 @@ +CREATE TABLE characterisationresult +( + file_path String, + property String, + source String, + property_value String, + value_type String +) ENGINE = ReplacingMergeTree + PRIMARY KEY (source, property, file_path) + ORDER BY (source, property, file_path); diff --git a/config/clickhouse/users.xml b/config/clickhouse/users.xml new file mode 100644 index 0000000..2c5b9f7 --- /dev/null +++ b/config/clickhouse/users.xml @@ -0,0 +1,58 @@ + + + + + + + + 10000000000 + + + random + + + + + 1 + + + + + + + 4acfe3202a5ff5cf467898fc58aab1d615029441 + + ::/0 + + default + default + 1 + + + + + + + + + + + 3600 + + + 0 + 0 + 0 + 0 + 0 + + + + \ No newline at end of file diff --git a/config/mysql-cluster/mysql-router.env b/config/mysql-cluster/mysql-router.env new file mode 100644 index 0000000..7118d81 --- /dev/null +++ b/config/mysql-cluster/mysql-router.env @@ -0,0 +1,7 @@ +MYSQL_HOST=mysql-server-1 +MYSQL_PORT=3306 + +MYSQL_USER=root +MYSQL_PASSWORD=mysql + +MYSQL_INNODB_NUM_MEMBERS=3 diff --git a/config/mysql-cluster/mysql-server.env b/config/mysql-cluster/mysql-server.env new file mode 100644 index 0000000..a3691cd --- /dev/null +++ b/config/mysql-cluster/mysql-server.env @@ -0,0 +1,2 @@ +MYSQL_ROOT_PASSWORD=mysql +MYSQL_ROOT_HOST=% \ No newline at end of file diff --git a/config/mysql-cluster/mysql-shell.env b/config/mysql-cluster/mysql-shell.env new file mode 100644 index 0000000..992ec3c --- /dev/null +++ b/config/mysql-cluster/mysql-shell.env @@ -0,0 +1,6 @@ +MYSQL_USER=root +MYSQL_HOST=mysql-server-1 +MYSQL_PORT=3306 +MYSQL_PASSWORD=mysql +MYSQLSH_SCRIPT=/scripts/setupCluster.js +MYSQL_SCRIPT=/scripts/db.sql diff --git a/config/mysql-cluster/scripts/db.sql b/config/mysql-cluster/scripts/db.sql new file mode 100644 index 0000000..4ae6754 --- /dev/null +++ b/config/mysql-cluster/scripts/db.sql @@ -0,0 +1,3 @@ +CREATE DATABASE IF NOT EXISTS fitsinn; +CREATE USER IF NOT EXISTS 'user'@'%' IDENTIFIED BY 'pass'; +GRANT ALL PRIVILEGES ON fitsinn.* TO 'user'@'%'; diff --git a/config/mysql-cluster/scripts/setupCluster.js b/config/mysql-cluster/scripts/setupCluster.js new file mode 100644 index 0000000..23f5559 --- /dev/null +++ b/config/mysql-cluster/scripts/setupCluster.js @@ -0,0 +1,16 @@ +var dbPass = "mysql" +var clusterName = "devCluster" + +try { + print('Setting up InnoDB cluster...\n'); + shell.connect('root@mysql-server-1:3306', dbPass) + var cluster = dba.createCluster(clusterName); + print('Adding instances to the cluster.'); + cluster.addInstance({user: "root", host: "mysql-server-2", password: dbPass}) + print('.'); + cluster.addInstance({user: "root", host: "mysql-server-3", password: dbPass}) + print('.\nInstances successfully added to the cluster.'); + print('\nInnoDB cluster deployed successfully.\n'); +} catch(e) { + print('\nThe InnoDB cluster could not be created.\n\nError: ' + e.message + '\n'); +} diff --git a/config/nginx/nginx.conf b/config/nginx/nginx.conf new file mode 100644 index 0000000..9222f1c --- /dev/null +++ b/config/nginx/nginx.conf @@ -0,0 +1,14 @@ +upstream backend { + server rest:8080; +} + +server { + listen 80; + + location / { + proxy_pass http://backend/; + proxy_request_buffering off; + proxy_http_version 1.1; + client_max_body_size 0; + } +} \ No newline at end of file diff --git a/core/src/main/java/rocks/artur/api/AnalyzePersistFile.java b/core/src/main/java/rocks/artur/api/AnalyzePersistFile.java index 86f9988..2bd9ea4 100644 --- a/core/src/main/java/rocks/artur/api/AnalyzePersistFile.java +++ b/core/src/main/java/rocks/artur/api/AnalyzePersistFile.java @@ -1,6 +1,8 @@ package rocks.artur.api; -import java.io.File; +import rocks.artur.api_impl.utils.ByteFile; + +import java.util.List; /** * This interface enables the following actions: @@ -8,6 +10,9 @@ * - to persist a characterisation result in a db. */ public interface AnalyzePersistFile { - Long uploadCharacterisationResults(File file); - Long uploadCharacterisationResults(byte[] file, String filename); + + Long uploadCharacterisationResults(ByteFile file, String datasetName); + + Long uploadCharacterisationResults(List files, String datasetName); + } diff --git a/core/src/main/java/rocks/artur/api/CharacterisationResultProducer.java b/core/src/main/java/rocks/artur/api/CharacterisationResultProducer.java index 4a0353b..d998ba4 100644 --- a/core/src/main/java/rocks/artur/api/CharacterisationResultProducer.java +++ b/core/src/main/java/rocks/artur/api/CharacterisationResultProducer.java @@ -1,5 +1,6 @@ package rocks.artur.api; +import rocks.artur.api_impl.utils.ByteFile; import rocks.artur.domain.CharacterisationResult; import java.io.File; @@ -15,7 +16,7 @@ public interface CharacterisationResultProducer { * @return A version of the tool * @throws IOException */ - String getVersion() throws IOException; + String getVersion(); /*** * @@ -25,7 +26,7 @@ public interface CharacterisationResultProducer { * @return A list of @CharacterisationResult * @throws IOException */ - List processFile(File file) throws IOException; + List processFile(File file); /*** @@ -33,9 +34,8 @@ public interface CharacterisationResultProducer { * This method extracts metadata properties from a given digital object passed as a byte array. * * @param file Input File - * @param filename * @return A list of @CharacterisationResult * @throws IOException */ - List processFile(byte[] file, String filename) throws IOException; + List processFile(ByteFile file); } \ No newline at end of file diff --git a/core/src/main/java/rocks/artur/api/GetCollectionStatistics.java b/core/src/main/java/rocks/artur/api/GetCollectionStatistics.java index 4ee5ae3..67719cd 100644 --- a/core/src/main/java/rocks/artur/api/GetCollectionStatistics.java +++ b/core/src/main/java/rocks/artur/api/GetCollectionStatistics.java @@ -5,6 +5,6 @@ import java.util.Map; public interface GetCollectionStatistics { - Map getStatistics(FilterCriteria filterCriteria); + Map getStatistics(FilterCriteria filterCriteria, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetDatasetInfo.java b/core/src/main/java/rocks/artur/api/GetDatasetInfo.java new file mode 100644 index 0000000..be38089 --- /dev/null +++ b/core/src/main/java/rocks/artur/api/GetDatasetInfo.java @@ -0,0 +1,7 @@ +package rocks.artur.api; + +import java.util.List; + +public interface GetDatasetInfo { + List listDatasets(); +} diff --git a/core/src/main/java/rocks/artur/api/GetObjects.java b/core/src/main/java/rocks/artur/api/GetObjects.java index a6732f8..0adef3b 100644 --- a/core/src/main/java/rocks/artur/api/GetObjects.java +++ b/core/src/main/java/rocks/artur/api/GetObjects.java @@ -10,8 +10,8 @@ * This interface enables getting characterisation results. */ public interface GetObjects { - List getObjects(FilterCriteria filterCriteria); - Iterable getObject(String filePath); + List getObjects(FilterCriteria filterCriteria, String datasetName); + Iterable getObject(String filePath, String datasetName); - List getConflictsFromObject(String filePath); + List getConflictsFromObject(String filePath, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetProperties.java b/core/src/main/java/rocks/artur/api/GetProperties.java index b371c5e..2c6239c 100644 --- a/core/src/main/java/rocks/artur/api/GetProperties.java +++ b/core/src/main/java/rocks/artur/api/GetProperties.java @@ -9,6 +9,6 @@ * This interface enables getting a property distribution. */ public interface GetProperties { - List getProperties(); - List getProperties(FilterCriteria filter); + List getProperties(String datasetName); + List getProperties(FilterCriteria filter, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetPropertyValueDistribution.java b/core/src/main/java/rocks/artur/api/GetPropertyValueDistribution.java index 54733bb..01566ba 100644 --- a/core/src/main/java/rocks/artur/api/GetPropertyValueDistribution.java +++ b/core/src/main/java/rocks/artur/api/GetPropertyValueDistribution.java @@ -11,5 +11,5 @@ * This interface enables getting a property value distribution given a property name. */ public interface GetPropertyValueDistribution { - List getPropertyValueDistribution(Property propertyName, FilterCriteria filterCriteria); + List getPropertyValueDistribution(Property propertyName, FilterCriteria filterCriteria, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetPropertyValueDistributionWithFilter.java b/core/src/main/java/rocks/artur/api/GetPropertyValueDistributionWithFilter.java index 1b1ebac..17b5173 100644 --- a/core/src/main/java/rocks/artur/api/GetPropertyValueDistributionWithFilter.java +++ b/core/src/main/java/rocks/artur/api/GetPropertyValueDistributionWithFilter.java @@ -8,5 +8,5 @@ * This interface enables getting a property value distribution given a filter. */ public interface GetPropertyValueDistributionWithFilter { - List getPropertyValueDistributionWithFilter(String propertyName, String filter); + List getPropertyValueDistributionWithFilter(String propertyName, String filter, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetSamples.java b/core/src/main/java/rocks/artur/api/GetSamples.java index 4cc9506..96a80c2 100644 --- a/core/src/main/java/rocks/artur/api/GetSamples.java +++ b/core/src/main/java/rocks/artur/api/GetSamples.java @@ -15,7 +15,7 @@ public interface GetSamples { void setProperties(List properties); - Iterable getObjects(FilterCriteria filterCriteria); + Iterable getObjects(FilterCriteria filterCriteria, String datasetName); - List getSamplingInfo(FilterCriteria filterCriteria); + List getSamplingInfo(FilterCriteria filterCriteria, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetSources.java b/core/src/main/java/rocks/artur/api/GetSources.java index b4b18c6..bd1e5c5 100644 --- a/core/src/main/java/rocks/artur/api/GetSources.java +++ b/core/src/main/java/rocks/artur/api/GetSources.java @@ -6,5 +6,5 @@ * This interface enables getting a property distribution. */ public interface GetSources { - List getSources(); + List getSources(String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/ResolveConflicts.java b/core/src/main/java/rocks/artur/api/ResolveConflicts.java index a08ea5c..11126b7 100644 --- a/core/src/main/java/rocks/artur/api/ResolveConflicts.java +++ b/core/src/main/java/rocks/artur/api/ResolveConflicts.java @@ -1,5 +1,5 @@ package rocks.artur.api; public interface ResolveConflicts { - void run(); + void run(String datasetName); } diff --git a/core/src/main/java/rocks/artur/api_impl/AnalyzePersistFileImpl.java b/core/src/main/java/rocks/artur/api_impl/AnalyzePersistFileImpl.java index 2399727..36f471a 100644 --- a/core/src/main/java/rocks/artur/api_impl/AnalyzePersistFileImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/AnalyzePersistFileImpl.java @@ -2,11 +2,12 @@ import rocks.artur.api.AnalyzePersistFile; import rocks.artur.api.CharacterisationResultProducer; +import rocks.artur.api_impl.utils.ByteFile; import rocks.artur.domain.CharacterisationResult; import rocks.artur.domain.CharacterisationResultGateway; -import java.io.File; -import java.io.IOException; + +import java.util.ArrayList; import java.util.List; public class AnalyzePersistFileImpl implements AnalyzePersistFile { @@ -20,24 +21,20 @@ public AnalyzePersistFileImpl(CharacterisationResultProducer characterisationRes } @Override - public Long uploadCharacterisationResults(File file) { - try { - List characterisationResults = characterisationResultProducer.processFile(file); - characterisationResults.forEach(item -> characterisationResultGateway.addCharacterisationResult(item)); - return Long.valueOf(characterisationResults.size()); - } catch (IOException e) { - throw new RuntimeException(e); - } + public Long uploadCharacterisationResults(ByteFile file, String datasetName) { + List characterisationResults = characterisationResultProducer.processFile(file); + characterisationResultGateway.addCharacterisationResults(characterisationResults, datasetName); + return Long.valueOf(characterisationResults.size()); } @Override - public Long uploadCharacterisationResults(byte[] file, String filename) { - try { - List characterisationResults = characterisationResultProducer.processFile(file, filename); - characterisationResultGateway.addCharacterisationResults(characterisationResults); - return Long.valueOf(characterisationResults.size()); - } catch (IOException e) { - throw new RuntimeException(e); - } + public Long uploadCharacterisationResults(List files, String datasetName) { + List characterisationResults = new ArrayList<>(); + files.stream().forEach(file -> { + List tmp = characterisationResultProducer.processFile(file); + characterisationResults.addAll(tmp); + }); + characterisationResultGateway.addCharacterisationResults(characterisationResults, datasetName); + return Long.valueOf(characterisationResults.size()); } } diff --git a/core/src/main/java/rocks/artur/api_impl/CRH_ResolveConflictsImpl.java b/core/src/main/java/rocks/artur/api_impl/CRH_ResolveConflictsImpl.java new file mode 100644 index 0000000..ec76608 --- /dev/null +++ b/core/src/main/java/rocks/artur/api_impl/CRH_ResolveConflictsImpl.java @@ -0,0 +1,153 @@ +package rocks.artur.api_impl; + +import rocks.artur.api.ResolveConflicts; +import rocks.artur.domain.CharacterisationResult; +import rocks.artur.domain.CharacterisationResultGateway; +import rocks.artur.domain.Entry; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; +import java.util.stream.Collectors; + +public class CRH_ResolveConflictsImpl {//implements ResolveConflicts { + + + private CharacterisationResultGateway characterisationResultGateway; + + public CRH_ResolveConflictsImpl(CharacterisationResultGateway characterisationResultGateway) { + this.characterisationResultGateway = characterisationResultGateway; + } + + + public void run(String datasetName) { + init(datasetName); + System.out.println(sourceWeights); + //System.out.println("sum of weights: " + sourceWeights.values().stream().reduce(0d, Double::sum)); + updateTruth(datasetName); + System.out.println("sum of weights: " + sourceWeights.values().stream().reduce(0d, Double::sum)); + //System.out.println(truth); + for (int i = 0; i < 3; i++) { + updateWeights(datasetName); + System.out.println(sourceWeights); + System.out.println("sum of weights: " + sourceWeights.values().stream().reduce(0d, Double::sum)); + updateTruth(datasetName); + //System.out.println(truth); + } + + resolveConflicts(datasetName); + } + + private void resolveConflicts(String datasetName) { + truth.entrySet().stream().forEach( entry -> { + Entry key = entry.getKey(); + String value = entry.getValue(); + + List characterisationResultsByEntry = characterisationResultGateway.getCharacterisationResultsByEntry(key, datasetName); + for (CharacterisationResult characterisationResult : characterisationResultsByEntry) { + if (!characterisationResult.getValue().equals(value)) { + characterisationResultGateway.delete(characterisationResult, datasetName); + } + } + + + }); + } + + private void updateWeights(String datasetName) { + Map score = sources.stream().collect(Collectors.toMap( + Function.identity(), + s -> 0.0)); + + Map count = sources.stream().collect(Collectors.toMap( + Function.identity(), + s -> 0.0)); + + + List entries = characterisationResultGateway.getEntries(datasetName); + + for (Entry entry : entries) { + List characterisationResults = characterisationResultGateway.getCharacterisationResultsByEntry(entry, datasetName); + + for (CharacterisationResult characterisationResult : characterisationResults) { + + String trueValue = truth.get(entry); + + String value = characterisationResult.getValue(); + String source = characterisationResult.getSource(); + if (value.equals(trueValue)) { + score.put(source, score.getOrDefault(source, 0.0) + 0); + } else { + score.put(source, score.getOrDefault(source, 0.0) + 1); + } + count.put(source, count.getOrDefault(source, 0.0) + 1); + } + } + for (String source : score.keySet()) { + Double countSource = count.getOrDefault(source, 1.0); + if (countSource == 0 ) { + score.put(source, 0d); + } else { + score.put(source, score.get(source) / countSource); + } + } + Double sum = score.values().stream().reduce(0.0, (a, b) -> a + b); + + score.replaceAll((s, v) -> score.get(s) / sum); + + Optional> max = score.entrySet().stream().max(Map.Entry.comparingByValue()); + if (max.isPresent()) { + Double norm_score = max.get().getValue(); + for (String source : score.keySet()) { + double w = score.get(source) / norm_score; + Double weig = score.get(source); + if (w == 0d) { + sourceWeights.put(source,0.00001); + } else { + sourceWeights.put(source, -Math.log(w)); + } + } + } + } + + private void updateTruth(String datasetName) { + List entries = characterisationResultGateway.getEntries(datasetName); + for (Entry entry : entries) { + List characterisationResults = characterisationResultGateway.getCharacterisationResultsByEntry(entry, datasetName); + + if (characterisationResults.size() > 0) { + CharacterisationResult firstResult = characterisationResults.get(0); + Map votingScores = new HashMap<>(); + for (CharacterisationResult characterisationResult : characterisationResults) { + String source = characterisationResult.getSource(); + Double sourceWeight = sourceWeights.get(source); + String value = characterisationResult.getValue(); + + votingScores.put(value, votingScores.getOrDefault(value, 0.0) + sourceWeight); + } + Optional> first = votingScores.entrySet().stream().max(Map.Entry.comparingByValue()); + if (first.isPresent()) { + String trueValue = first.get().getKey(); + truth.put(entry, trueValue); + } + } + } + } + + List sources; + Map sourceWeights; + Map truth; + + void init(String datasetName) { + + sources = characterisationResultGateway.getSources(datasetName); + sourceWeights = sources.stream().collect(Collectors.toMap( + Function.identity(), + s -> 1.0 / sources.size())); + truth = new HashMap<>(); + + + } +} diff --git a/core/src/main/java/rocks/artur/api_impl/GetCollectionStatisticsImpl.java b/core/src/main/java/rocks/artur/api_impl/GetCollectionStatisticsImpl.java index ea97869..10bcd1c 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetCollectionStatisticsImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetCollectionStatisticsImpl.java @@ -15,8 +15,8 @@ public GetCollectionStatisticsImpl(CharacterisationResultGateway characterisatio } @Override - public Map getStatistics(FilterCriteria filterCriteria) { - Map sizeStatistics = characterisationResultGateway.getCollectionStatistics(filterCriteria); + public Map getStatistics(FilterCriteria filterCriteria, String datasetName) { + Map sizeStatistics = characterisationResultGateway.getCollectionStatistics(filterCriteria, datasetName); return sizeStatistics; } diff --git a/core/src/main/java/rocks/artur/api_impl/GetDatasetInfoImpl.java b/core/src/main/java/rocks/artur/api_impl/GetDatasetInfoImpl.java new file mode 100644 index 0000000..a45f302 --- /dev/null +++ b/core/src/main/java/rocks/artur/api_impl/GetDatasetInfoImpl.java @@ -0,0 +1,19 @@ +package rocks.artur.api_impl; + +import rocks.artur.api.GetDatasetInfo; +import rocks.artur.domain.CharacterisationResultGateway; + +import java.util.List; + +public class GetDatasetInfoImpl implements GetDatasetInfo { + private CharacterisationResultGateway characterisationResultGateway; + + public GetDatasetInfoImpl(CharacterisationResultGateway characterisationResultGateway) { + this.characterisationResultGateway = characterisationResultGateway; + } + + @Override + public List listDatasets() { + return this.characterisationResultGateway.listDatasets(); + } +} diff --git a/core/src/main/java/rocks/artur/api_impl/GetObjectsImpl.java b/core/src/main/java/rocks/artur/api_impl/GetObjectsImpl.java index 61d2585..bffbbb2 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetObjectsImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetObjectsImpl.java @@ -16,20 +16,20 @@ public GetObjectsImpl(CharacterisationResultGateway characterisationResultGatewa } @Override - public List getObjects(FilterCriteria filterCriteria) { - List objects = characterisationResultGateway.getObjects(filterCriteria); + public List getObjects(FilterCriteria filterCriteria, String datasetName) { + List objects = characterisationResultGateway.getObjects(filterCriteria, datasetName); return objects; } @Override - public Iterable getObject(String filePath) { - Iterable characterisationResultsByFilepath = characterisationResultGateway.getCharacterisationResultsByFilepath(filePath); + public Iterable getObject(String filePath, String datasetName) { + Iterable characterisationResultsByFilepath = characterisationResultGateway.getCharacterisationResultsByFilepath(filePath, datasetName); return characterisationResultsByFilepath; } @Override - public List getConflictsFromObject(String filePath) { - List characterisationResultsByFilepath = characterisationResultGateway.getConflictsByFilepath(filePath); + public List getConflictsFromObject(String filePath, String datasetName) { + List characterisationResultsByFilepath = characterisationResultGateway.getConflictsByFilepath(filePath, datasetName); return characterisationResultsByFilepath; } diff --git a/core/src/main/java/rocks/artur/api_impl/GetPropertiesImpl.java b/core/src/main/java/rocks/artur/api_impl/GetPropertiesImpl.java index f1839d9..4c6d0a3 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetPropertiesImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetPropertiesImpl.java @@ -15,14 +15,14 @@ public GetPropertiesImpl(CharacterisationResultGateway characterisationResultGat } @Override - public List getProperties() { - List propertyDistribution = characterisationResultGateway.getPropertyDistribution(null); + public List getProperties(String datasetName) { + List propertyDistribution = characterisationResultGateway.getPropertyDistribution(null, datasetName); return propertyDistribution; } @Override - public List getProperties(FilterCriteria filter) { - List propertyDistribution = characterisationResultGateway.getPropertyDistribution(filter); + public List getProperties(FilterCriteria filter, String datasetName) { + List propertyDistribution = characterisationResultGateway.getPropertyDistribution(filter, datasetName); return propertyDistribution; } } diff --git a/core/src/main/java/rocks/artur/api_impl/GetPropertyValueDistributionImpl.java b/core/src/main/java/rocks/artur/api_impl/GetPropertyValueDistributionImpl.java index 792c79e..86f7ce9 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetPropertyValueDistributionImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetPropertyValueDistributionImpl.java @@ -18,8 +18,8 @@ public GetPropertyValueDistributionImpl(CharacterisationResultGateway characteri @Override - public List getPropertyValueDistribution(Property property, FilterCriteria filterCriteria) { - List valueDistributionByProperty = characterisationResultGateway.getPropertyValueDistribution(property, filterCriteria); + public List getPropertyValueDistribution(Property property, FilterCriteria filterCriteria, String datasetName) { + List valueDistributionByProperty = characterisationResultGateway.getPropertyValueDistribution(property, filterCriteria, datasetName); return valueDistributionByProperty; } } diff --git a/core/src/main/java/rocks/artur/api_impl/GetSamplesImpl.java b/core/src/main/java/rocks/artur/api_impl/GetSamplesImpl.java index 9df1966..0e3b98e 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetSamplesImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetSamplesImpl.java @@ -34,8 +34,8 @@ public void setProperties(List properties) { } @Override - public List getObjects(FilterCriteria filterCriteria) { - List samplingResults = characterisationResultGateway.getSamples(filterCriteria, algorithm, properties); + public List getObjects(FilterCriteria filterCriteria, String datasetName) { + List samplingResults = characterisationResultGateway.getSamples(filterCriteria, algorithm, properties, datasetName); List results = new ArrayList<>(); switch (algorithm) { @@ -50,8 +50,8 @@ public List getObjects(FilterCriteria filterCriteria) { } @Override - public List getSamplingInfo(FilterCriteria filterCriteria) { - List samplingResults = characterisationResultGateway.getSamples(filterCriteria, algorithm, properties); + public List getSamplingInfo(FilterCriteria filterCriteria, String datasetName) { + List samplingResults = characterisationResultGateway.getSamples(filterCriteria, algorithm, properties, datasetName); return samplingResults; } diff --git a/core/src/main/java/rocks/artur/api_impl/GetSourcesImpl.java b/core/src/main/java/rocks/artur/api_impl/GetSourcesImpl.java index 977ccee..c1631b6 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetSourcesImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetSourcesImpl.java @@ -13,8 +13,8 @@ public GetSourcesImpl(CharacterisationResultGateway characterisationResultGatewa } @Override - public List getSources() { - List sources = characterisationResultGateway.getSources(); + public List getSources(String datasetName) { + List sources = characterisationResultGateway.getSources(datasetName); return sources; } } diff --git a/core/src/main/java/rocks/artur/api_impl/Native_ResolveConflictsImpl.java b/core/src/main/java/rocks/artur/api_impl/Native_ResolveConflictsImpl.java new file mode 100644 index 0000000..2a9581c --- /dev/null +++ b/core/src/main/java/rocks/artur/api_impl/Native_ResolveConflictsImpl.java @@ -0,0 +1,16 @@ +package rocks.artur.api_impl; + +import rocks.artur.api.ResolveConflicts; +import rocks.artur.domain.CharacterisationResultGateway; + +public class Native_ResolveConflictsImpl implements ResolveConflicts { + private CharacterisationResultGateway characterisationResultGateway; + + public Native_ResolveConflictsImpl(CharacterisationResultGateway characterisationResultGateway) { + this.characterisationResultGateway = characterisationResultGateway; + } + @Override + public void run(String datasetName) { + characterisationResultGateway.resolveConflictsNative(datasetName); + } +} diff --git a/core/src/main/java/rocks/artur/api_impl/filter/FilterOperation.java b/core/src/main/java/rocks/artur/api_impl/filter/FilterOperation.java index d510842..058bd2d 100644 --- a/core/src/main/java/rocks/artur/api_impl/filter/FilterOperation.java +++ b/core/src/main/java/rocks/artur/api_impl/filter/FilterOperation.java @@ -1,6 +1,5 @@ package rocks.artur.api_impl.filter; -import rocks.artur.domain.ValueType; public enum FilterOperation { LESS("<"), LESS_OR_EQUAL ("<="), diff --git a/core/src/main/java/rocks/artur/api_impl/utils/ByteFile.java b/core/src/main/java/rocks/artur/api_impl/utils/ByteFile.java new file mode 100644 index 0000000..748a148 --- /dev/null +++ b/core/src/main/java/rocks/artur/api_impl/utils/ByteFile.java @@ -0,0 +1,27 @@ +package rocks.artur.api_impl.utils; + +public class ByteFile { + byte[] file; + String filename; + + public ByteFile(byte[] file, String filename) { + this.file = file; + this.filename = filename; + } + + public byte[] getFile() { + return file; + } + + public String getFilename() { + return filename; + } + + public void setFile(byte[] file) { + this.file = file; + } + + public void setFilename(String filename) { + this.filename = filename; + } +} diff --git a/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java b/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java index f9a86bc..ba2c7fc 100644 --- a/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java +++ b/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java @@ -18,14 +18,14 @@ public interface CharacterisationResultGateway { * * @param characterisationResult */ - void addCharacterisationResult(CharacterisationResult characterisationResult); + void addCharacterisationResult(CharacterisationResult characterisationResult, String datasetName); /** * gets all characterisation results * * @return an iterable of all results stored in the DB. */ - List getCharacterisationResults(FilterCriteria filter); + List getCharacterisationResults(FilterCriteria filter, String datasetName); /** * gets a distribution of all properties that match the given filter criteria. @@ -33,56 +33,60 @@ public interface CharacterisationResultGateway { * @param filter a filter criteria * @return a list of property statistics */ - List getPropertyDistribution(FilterCriteria filter); + List getPropertyDistribution(FilterCriteria filter, String datasetName); /** * gets characterisation results describing a digital object identified by the given file path. * * @return an iterable of characterisation results. */ - List getCharacterisationResultsByFilepath(String filePath); + List getCharacterisationResultsByFilepath(String filePath, String datasetName); - List getCharacterisationResultsByEntry(Entry entry); + List getCharacterisationResultsByEntry(Entry entry, String datasetName); - List getConflictEntries(); + List getConflictEntries(String datasetName); - List getEntries(); + List getEntries(String datasetName); /** * gets a list of characterisation results with conflicts for a given digital object. * * @return an iterable of characterisation results. */ - List getConflictsByFilepath(String filepath); + List getConflictsByFilepath(String filepath, String datasetName); - Map getCollectionStatistics(FilterCriteria filterCriteria); + Map getCollectionStatistics(FilterCriteria filterCriteria, String datasetName); - List getPropertyValueDistribution(Property property, FilterCriteria filter); + List getPropertyValueDistribution(Property property, FilterCriteria filter, String datasetName); /** * gets a list of sources of characterisation results. * * @return an iterable of characterisation result sources. */ - List getSources(); + List getSources(String datasetName); /** * gets a list of objects. * * @return an iterable of PropertiesPerObjectStatistic. */ - List getObjects(FilterCriteria filterCriteria); + List getObjects(FilterCriteria filterCriteria, String datasetName); /** * gets a list of samples. * * @return an iterable of PropertiesPerObjectStatistic. */ - List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties); + List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties, String datasetName); - void addCharacterisationResults(List characterisationResults); + void addCharacterisationResults(List characterisationResults, String datasetName); - double getConflictRate(); + double getConflictRate(String datasetName); - void delete(CharacterisationResult characterisationResult); + void delete(CharacterisationResult characterisationResult, String datasetName); + + void resolveConflictsNative(String datasetName); + + List listDatasets(); } diff --git a/core/src/main/java/rocks/artur/domain/Property.java b/core/src/main/java/rocks/artur/domain/Property.java index e607ce1..bb3b730 100644 --- a/core/src/main/java/rocks/artur/domain/Property.java +++ b/core/src/main/java/rocks/artur/domain/Property.java @@ -7,13 +7,9 @@ public enum Property { FORMAT(ValueType.STRING), FORMAT_VERSION(ValueType.STRING), MIMETYPE(ValueType.STRING), - FILENAME(ValueType.STRING), - AUTHOR(ValueType.STRING), EXTERNALIDENTIFIER(ValueType.STRING), SIZE(ValueType.INTEGER), - MD5CHECKSUM(ValueType.STRING), FSLASTMODIFIED(ValueType.TIMESTAMP), - FILEPATH(ValueType.STRING), CREATED(ValueType.TIMESTAMP), LASTMODIFIED(ValueType.TIMESTAMP), CREATINGAPPLICATIONVERSION(ValueType.STRING), @@ -26,15 +22,12 @@ public enum Property { WELLFORMED(ValueType.STRING), - MESSAGE(ValueType.STRING), - LINEBREAK(ValueType.STRING), CHARSET(ValueType.STRING), PAGECOUNT(ValueType.INTEGER), WORDCOUNT(ValueType.INTEGER), CHARACTERCOUNT(ValueType.INTEGER), HASANNOTATIONS(ValueType.STRING), - TITLE(ValueType.STRING), ISTAGGED(ValueType.STRING), HASFORMS(ValueType.STRING), HASOUTLINE(ValueType.STRING), diff --git a/docker-compose.clickhouse.dev.yaml b/docker-compose.clickhouse.dev.yaml new file mode 100644 index 0000000..835e06e --- /dev/null +++ b/docker-compose.clickhouse.dev.yaml @@ -0,0 +1,105 @@ +version: '3' + +services: + + fits: + build: + context: ./fits + dockerfile: ./Dockerfile + container_name: fits + env_file: .env + networks: + - web + restart: unless-stopped + ports: + - 8081:8080 + + rest: + build: + context: . + dockerfile: ./Dockerfile + env_file: .env + networks: + - web + restart: unless-stopped + environment: + - DB_SELECTOR=clickhouse + deploy: + replicas: 1 + ports: + - 8092:8080 + depends_on: + - fits + - db-docker + + web: + build: + context: ./web + dockerfile: ./Dockerfile + container_name: web + env_file: .env + networks: + - web + restart: unless-stopped + ports: + - 8080:3000 + + db-docker: + image: yandex/clickhouse-server + container_name: db-docker + networks: + - web + ports: + - 8123:8123 + - 9000:9000 + - 9004:9004 + + + db-docker-init: + image: yandex/clickhouse-server + container_name: db-docker-init + volumes: + - ./config/clickhouse:/var/clickhouse + depends_on: + - db-docker + networks: + - web + entrypoint: [ '/bin/sh', '-c' ] + command: | + " + while ! clickhouse-client --host db-docker -q \"SHOW databases;\"; do + echo waiting for clickhouse up + sleep 1 + done + + clickhouse-client --host db-docker --queries-file /var/clickhouse/initdb.sql + + tail -f /dev/null + " + + + adminer: + image: adminer + container_name: adminer + env_file: .env + restart: unless-stopped + networks: + - web + ports: + - 8090:8080 + + nginx: + image: nginx + container_name: nginx + env_file: .env + volumes: + - ./config/nginx/nginx.conf:/etc/nginx/conf.d/default.conf + ports: + - 8082:80 + networks: + - web + depends_on: + - rest + +networks: + web: \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index 09c02ce..c464759 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -3,7 +3,7 @@ version: '3' services: fits: - image: artourkin/fits-web:release-0.8 + image: artourkin/fits-web:main container_name: fits env_file: .env networks: @@ -13,7 +13,7 @@ services: - 8081:8080 rest: - image: artourkin/fitsinn-rest:release-0.8 + image: artourkin/fitsinn-rest:main container_name: rest env_file: .env networks: @@ -26,7 +26,7 @@ services: - db-docker web: - image: artourkin/fitsinn-web:release-0.8 + image: artourkin/fitsinn-web:main container_name: web env_file: .env networks: diff --git a/fits-client/src/main/java/rocks/artur/FITSClient/FITSClient.java b/fits-client/src/main/java/rocks/artur/FITSClient/FITSClient.java index e0be439..fa0a57f 100644 --- a/fits-client/src/main/java/rocks/artur/FITSClient/FITSClient.java +++ b/fits-client/src/main/java/rocks/artur/FITSClient/FITSClient.java @@ -16,11 +16,15 @@ import org.xml.sax.SAXException; import rocks.artur.FITSObjects.FITSPropertyJsonPath; import rocks.artur.api.CharacterisationResultProducer; +import rocks.artur.api_impl.utils.ByteFile; import rocks.artur.domain.CharacterisationResult; import rocks.artur.domain.Property; +import rocks.artur.domain.ValueType; import rocks.artur.utils.JSONToolkit; +import rocks.artur.utils.STAXToolkit; import javax.xml.XMLConstants; +import javax.xml.stream.XMLStreamException; import javax.xml.transform.Source; import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; @@ -29,12 +33,13 @@ import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Set; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; +import java.util.*; import java.util.stream.Collectors; //@ApplicationScoped @@ -43,12 +48,25 @@ public class FITSClient implements CharacterisationResultProducer { List knownProperties = Arrays.stream(FITSPropertyJsonPath.values()).map(Enum::name).collect(Collectors.toList()); private String FITS_URL = "http://localhost:8888"; + + static DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + static DateTimeFormatter inputFormatter = new DateTimeFormatterBuilder() + .appendPattern("[yyyy:MM:dd HH:mm:ssXXX][yyyy:MM:dd HH:mm:ss][yyyy:MM:dd HH:mmXXX][yyyy-MM-dd HH:mm:ss][yyyy/MM/dd HH:mm:ss]") + .toFormatter(); + @Override - public String getVersion() throws IOException { + public String getVersion(){ CloseableHttpClient httpclient = HttpClients.createDefault(); HttpGet httpGet = new HttpGet(getFITS_URL() + "/version"); - return getString(httpclient.execute(httpGet)); + try { + return getString(httpclient.execute(httpGet)); + } catch (IOException e) { + LOG.error("Exception occurred when querying the FITS version"); + e.printStackTrace(); + } + return ""; } @@ -74,46 +92,88 @@ public boolean isValid(byte[] file) { } } - public List processFile(byte[] file, String filename) throws IOException { - - if (isValid(file)) { - try { - String fitsSTRING = new String(file, StandardCharsets.UTF_8); - return extractCharacterisationResults(fitsSTRING); - } catch (JSONException e) { - throw new RuntimeException(e); - } - } else { + public boolean isValid(String content) { + return content.contains("xmlns=\"http://hul.harvard.edu/ois/xml/ns/fits/fits_output\""); + } - CloseableHttpClient httpclient = HttpClients.createDefault(); - HttpPost httppost = new HttpPost(getFITS_URL() + "/fits/examine"); + @Override + public List processFile(File file) { + String fileName = file.getName(); + byte[] fileContent = new byte[0]; + try { + fileContent = Files.readAllBytes(file.toPath()); + } catch (IOException e) { + LOG.error("Exception occurred during file processing"); + e.printStackTrace(); + } + ByteFile byteFile = new ByteFile(fileContent, fileName); + return processFile(byteFile); + } - ByteArrayBody body = new ByteArrayBody(file, filename); + @Override + public List processFile(ByteFile file) { + ArrayList result = new ArrayList<>(); + if (file.getFile().length == 0) { + return result; + } + try { + String content = new String(file.getFile()); + if (!isValid(content)) { + CloseableHttpClient httpclient = HttpClients.createDefault(); + HttpPost httppost = new HttpPost(getFITS_URL() + "/fits/examine"); + ByteArrayBody body = new ByteArrayBody(file.getFile(), file.getFilename()); + MultipartEntityBuilder builder = MultipartEntityBuilder.create(); + builder.addPart("datafile", body); + HttpEntity reqEntity = builder.build(); + httppost.setEntity(reqEntity); + CloseableHttpResponse response = httpclient.execute(httppost); + + content = getString(response); + LOG.debug(content); + } + result.addAll(extractCharacterisationResultsStax(content)); + } catch (Exception e) { + LOG.error("Exception occurred during FITS file parsing"); + e.printStackTrace(); + } - MultipartEntityBuilder builder = MultipartEntityBuilder.create(); - builder.addPart("datafile", body); - HttpEntity reqEntity = builder.build(); - httppost.setEntity(reqEntity); + result=this.fixDateTypes(result); + return result; + } - CloseableHttpResponse response = httpclient.execute(httppost); - String fitsResultXML = getString(response); - LOG.debug(fitsResultXML); - try { - return extractCharacterisationResults(fitsResultXML); - } catch (JSONException e) { - throw new RuntimeException(e); + private ArrayList fixDateTypes(ArrayList result) { + result.stream().forEach(item -> { + if (item.getValueType().equals(ValueType.TIMESTAMP)){ + String value = item.getValue(); + LOG.debug(String.format("Parsing Object: %s", item)); + if (item.getSource().startsWith("OIS File Information")) { + LocalDateTime parsed = + LocalDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(value)), + TimeZone.getDefault().toZoneId()); + item.setValue(parsed.format(outputFormatter)); + } else { + LocalDateTime parsed = tryParseLocalDateTime(value, inputFormatter); + if (parsed != null) { + item.setValue(parsed.format(outputFormatter)); + } else { + item.setValue(null); + } + } + LOG.debug(String.format("Parsed Result: %s", item)); } - } + }); + return result; } - @Override - public List processFile(File file) throws IOException { - String fileName = file.getName(); - byte[] fileContent = Files.readAllBytes(file.toPath()); - return processFile(fileContent, fileName); + LocalDateTime tryParseLocalDateTime(String datetimeString, DateTimeFormatter formatter) { + try { + return LocalDateTime.parse(datetimeString, formatter); + } catch (DateTimeParseException e) { + return null; + } } - private List extractCharacterisationResults(String fitsResultXML) throws JSONException { + List extractCharacterisationResults(String fitsResultXML) throws JSONException { List results = new ArrayList<>(); String fitsResultJSON = JSONToolkit.translateXML(fitsResultXML); Set availableFitsProperties = JSONToolkit.getAvailableFitsProperties(fitsResultJSON); @@ -133,14 +193,20 @@ private List extractCharacterisationResults(String fitsR JSONToolkit.getCharacterisationResults(FITSPropertyJsonPath.IDENTIFICATION, fitsResultJSON); results.addAll(characterisationResults); - String filepath = results.stream().filter(result -> - result.getProperty().equals(Property.FILEPATH)).findFirst().get().getValue().toString(); - addFilepathLabel(results, filepath); + //String filepath = results.stream().filter(result -> + // result.getProperty().equals(Property.FILEPATH)).findFirst().get().getValue().toString(); + //addFilepathLabel(results, filepath); return results; } + List extractCharacterisationResultsStax(String fitsResultXML) throws XMLStreamException { + STAXToolkit staxToolkit = new STAXToolkit(); + return staxToolkit.getCharacterisationResults(fitsResultXML); + + } + private void addFilepathLabel(List characterisationResults, String filepath) { characterisationResults.stream().forEach(result -> result.setFilePath(filepath)); diff --git a/fits-client/src/main/java/rocks/artur/utils/JSONToolkit.java b/fits-client/src/main/java/rocks/artur/utils/JSONToolkit.java index f4de29d..7f84d32 100644 --- a/fits-client/src/main/java/rocks/artur/utils/JSONToolkit.java +++ b/fits-client/src/main/java/rocks/artur/utils/JSONToolkit.java @@ -15,12 +15,11 @@ import rocks.artur.domain.CharacterisationResult; import rocks.artur.domain.Property; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.time.Instant; import java.time.LocalDateTime; -import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; import java.util.*; import java.util.stream.Collectors; @@ -28,7 +27,12 @@ public class JSONToolkit { private static final Logger LOG = LoggerFactory.getLogger(JSONToolkit.class); public static int PRETTY_PRINT_INDENT_FACTOR = 4; - static DateTimeFormatter outputFormat = DateTimeFormatter.ofPattern("dd-MM-yyyy HH:mm:ss"); + static DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + static DateTimeFormatter inputFormatter = new DateTimeFormatterBuilder() + .appendPattern("[yyyy:MM:dd HH:mm:ssXXX][yyyy:MM:dd HH:mm:ss][yyyy:MM:dd HH:mmXXX][yyyy-MM-dd HH:mm:ss][yyyy/MM/dd HH:mm:ss]") + .toFormatter(); + public static String translateXML(String xmlString) throws JSONException { JSONObject xmlJSONObj = XML.toJSONObject(xmlString); @@ -120,19 +124,25 @@ private static Collection parseGenericProperty } CharacterisationResult tmpResult = new CharacterisationResult(); - setValues(tmpResult, Property.valueOf(jsonPath.name().toUpperCase()), gp.getContent()); - tmpResult.setSource(gp.getToolname() + ":" + gp.getToolversion()); - tmpResult = convertDataTypes(tmpResult); - result.add(tmpResult); + boolean isPresent = Arrays.stream(Property.values()).anyMatch(item -> jsonPath.name().equalsIgnoreCase(item.name())); + if (isPresent) { + setValues(tmpResult, Property.valueOf(jsonPath.name().toUpperCase()), gp.getContent()); + tmpResult.setSource(gp.getToolname() + ":" + gp.getToolversion()); + tmpResult = convertDataTypes(tmpResult); + result.add(tmpResult); + } } } else { GenericProperty read = document.read(jsonPath.getFitsProperty(), GenericProperty.class); CharacterisationResult tmpResult = new CharacterisationResult(); - setValues(tmpResult, Property.valueOf(jsonPath.name().toUpperCase()), read.getContent()); - tmpResult.setSource(read.getToolname() + ":" + read.getToolversion()); - tmpResult = convertDataTypes(tmpResult); - result.add(tmpResult); + boolean isPresent = Arrays.stream(Property.values()).anyMatch(item -> jsonPath.name().equalsIgnoreCase(item.name())); + if (isPresent) { + setValues(tmpResult, Property.valueOf(jsonPath.name().toUpperCase()), read.getContent()); + tmpResult.setSource(read.getToolname() + ":" + read.getToolversion()); + tmpResult = convertDataTypes(tmpResult); + result.add(tmpResult); + } } } catch (Exception e) { e.printStackTrace(); @@ -140,57 +150,27 @@ private static Collection parseGenericProperty return result; } - private static CharacterisationResult convertDataTypes(CharacterisationResult tmpResult) { - + private static CharacterisationResult convertDataTypes(CharacterisationResult tmpResult) throws DateTimeParseException { switch (tmpResult.getProperty()) { case CREATED: case FSLASTMODIFIED: case LASTMODIFIED: LOG.debug(String.format("Parsing Object: %s", tmpResult)); - if (tmpResult.getSource().startsWith("Exiftool")) { - try { - SimpleDateFormat sdf = new SimpleDateFormat("yyyy:MM:dd HH:mm:ssXXX"); - LocalDateTime parse = sdf.parse(tmpResult.getValue()).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); - tmpResult.setValue(parse.format(outputFormat)); - } catch (ParseException e) { - try { - SimpleDateFormat sdf = new SimpleDateFormat("yyyy:MM:dd HH:mm:ss"); - LocalDateTime parse = sdf.parse(tmpResult.getValue()).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); - tmpResult.setValue(parse.format(outputFormat)); - } catch (ParseException ex) { - try { - SimpleDateFormat sdf = new SimpleDateFormat("yyyy:MM:dd HH:mmXXX"); - LocalDateTime parse = sdf.parse(tmpResult.getValue()).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); - tmpResult.setValue(parse.format(outputFormat)); - } catch (ParseException ex2) { - throw new RuntimeException(ex2); - } - } - } - } else if (tmpResult.getSource().startsWith("NLNZ Metadata Extractor")) { - try { - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); - LocalDateTime parse = sdf.parse(tmpResult.getValue()).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); - tmpResult.setValue(parse.format(outputFormat)); - } catch (ParseException ex) { - throw new RuntimeException(ex); - } - } else if (tmpResult.getSource().startsWith("OIS File Information")) { - LocalDateTime triggerTime = + + if (tmpResult.getSource().startsWith("OIS File Information")) { + LocalDateTime parsed = LocalDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(tmpResult.getValue())), TimeZone.getDefault().toZoneId()); - tmpResult.setValue(triggerTime.format(outputFormat)); - } else if (tmpResult.getSource().startsWith("Tika")) { - DateTimeFormatter tikaFormatter = DateTimeFormatter.ISO_INSTANT; - Instant dateInstant = Instant.from(tikaFormatter.parse(tmpResult.getValue())); - LocalDateTime date = LocalDateTime.ofInstant(dateInstant, ZoneId.systemDefault()); - tmpResult.setValue(date.format(outputFormat)); + tmpResult.setValue(parsed.format(outputFormatter)); + } else { + LocalDateTime parsed = LocalDateTime.parse(tmpResult.getValue(), inputFormatter); + tmpResult.setValue(parsed.format(outputFormatter)); } + LOG.debug(String.format("Parsed Result: %s", tmpResult)); break; } - return tmpResult; } diff --git a/fits-client/src/main/java/rocks/artur/utils/STAXToolkit.java b/fits-client/src/main/java/rocks/artur/utils/STAXToolkit.java new file mode 100644 index 0000000..a064f39 --- /dev/null +++ b/fits-client/src/main/java/rocks/artur/utils/STAXToolkit.java @@ -0,0 +1,209 @@ +package rocks.artur.utils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import rocks.artur.domain.CharacterisationResult; +import rocks.artur.domain.Property; + +import javax.xml.namespace.QName; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import java.io.StringReader; +import java.security.KeyPairGenerator; +import java.util.*; + +public class STAXToolkit { + + private final Logger LOG = LoggerFactory.getLogger(STAXToolkit.class); + Map identities = new HashMap<>(); // [, ] + List sources = new ArrayList<>(); // [, ] + String filepath; + String propertyValue; + List results = new ArrayList<>(); + + public List getCharacterisationResults(String fitsResultXML) throws XMLStreamException { + + XMLInputFactory factory = XMLInputFactory.newInstance(); + XMLStreamReader reader = factory.createXMLStreamReader(new StringReader(fitsResultXML)); + + while (reader.hasNext()) { + int event = reader.next(); + switch (event) { + case XMLStreamConstants.START_ELEMENT: + QName elementName = reader.getName(); + handleStartElement(elementName, reader); + break; + case XMLStreamConstants.END_ELEMENT: + QName endElementName = reader.getName(); + handleEndElement(endElementName, reader); + break; + case XMLStreamConstants.CHARACTERS: + String text = reader.getText().trim(); + handleText(text); + break; + } + } + results.forEach(item -> item.setFilePath(filepath)); + + return results; + } + + private void handleStartElement(QName elementName, XMLStreamReader reader) throws XMLStreamException { + // Add your logic to handle specific elements + String elementNameLocalPart = elementName.getLocalPart(); + LOG.debug("Start Element: " + elementNameLocalPart); + + switch (elementNameLocalPart) { + case "identity": + for (int i = 0; i < reader.getAttributeCount(); i++) { + String attributeName = reader.getAttributeName(i).getLocalPart(); + String attributeValue = reader.getAttributeValue(i); + LOG.debug(" - Attribute: " + attributeName + "=" + attributeValue); + if (attributeName.equals("format") || attributeName.equals("mimetype")) { + identities.put(attributeName, attributeValue); + } + } + break; + case "tool": + for (int i = 0; i < reader.getAttributeCount(); i++) { + String attributeName = reader.getAttributeName(i).getLocalPart(); + String attributeValue = reader.getAttributeValue(i); + LOG.debug(" - Attribute: " + attributeName + "=" + attributeValue); + + if ("toolname".equals(attributeName)) { + sources.add(attributeValue); + } + if ("toolversion".equals(attributeName)) { + String toolname = sources.get(sources.size() - 1); + toolname += ":" + attributeValue; + sources.set(sources.size() - 1, toolname); + } + } + break; + case "version": + String version_source = ""; + for (int i = 0; i < reader.getAttributeCount(); i++) { + String attributeName = reader.getAttributeName(i).getLocalPart(); + String attributeValue = reader.getAttributeValue(i); + LOG.debug(" - Attribute: " + attributeName + "=" + attributeValue); + if ("toolname".equals(attributeName)) { + version_source = attributeValue; + } + if ("toolversion".equals(attributeName)) { + version_source += ":" + attributeValue; + } + } + CharacterisationResult resVersion = new CharacterisationResult(); + resVersion.setProperty(Property.FORMAT_VERSION); + resVersion.setSource(version_source); + resVersion.setValueType(Property.FORMAT_VERSION.getValueType()); + results.add(resVersion); + break; + + case "well-formed": + String sourceWellformed = ""; + for (int i = 0; i < reader.getAttributeCount(); i++) { + String attributeName = reader.getAttributeName(i).getLocalPart(); + String attributeValue = reader.getAttributeValue(i); + LOG.debug(" - Attribute: " + attributeName + "=" + attributeValue); + if ("toolname".equals(attributeName)) { + sourceWellformed = attributeValue; + } + if ("toolversion".equals(attributeName)) { + sourceWellformed += ":" + attributeValue; + } + } + CharacterisationResult resWellformed = new CharacterisationResult(); + resWellformed.setProperty(Property.WELLFORMED); + resWellformed.setSource(sourceWellformed); + resWellformed.setValueType(Property.WELLFORMED.getValueType()); + results.add(resWellformed); + break; + default: + String property = elementNameLocalPart; + boolean isPresent = Arrays.stream(Property.values()).anyMatch(item -> property.equalsIgnoreCase(item.name())); + if (isPresent) { + String source = ""; + for (int i = 0; i < reader.getAttributeCount(); i++) { + String attributeName = reader.getAttributeName(i).getLocalPart(); + String attributeValue = reader.getAttributeValue(i); + LOG.debug(" - Attribute: " + attributeName + "=" + attributeValue); + if ("toolname".equals(attributeName)) { + source = attributeValue; + } + if ("toolversion".equals(attributeName)) { + source += ":" + attributeValue; + } + } + CharacterisationResult new_res = new CharacterisationResult(); + Property propertyEnum = Property.valueOf(property.toUpperCase()); + new_res.setProperty(propertyEnum); + new_res.setSource(source); + new_res.setValueType(propertyEnum.getValueType()); + results.add(new_res); + } + + + } + + } + + private void handleEndElement(QName endElementName, XMLStreamReader reader) { + // Add your logic to handle specific end elements + String elementNameLocalPart = endElementName.getLocalPart(); + LOG.debug("End Element: " + elementNameLocalPart); + + + switch (elementNameLocalPart) { + + case "identity": + for (Map.Entry identity : identities.entrySet()) { + String property = identity.getKey(); + String value = identity.getValue(); + + for (String source : sources) { + CharacterisationResult new_res = new CharacterisationResult(); + Property propertyEnum = Property.valueOf(property.toUpperCase()); + new_res.setProperty(propertyEnum); + new_res.setValue(value); + new_res.setSource(source); + new_res.setValueType(propertyEnum.getValueType()); + results.add(new_res); + } + } + identities.clear(); + sources.clear(); + break; + case "version": + CharacterisationResult characterisationResultVersion = results.get(results.size() - 1); + String format = identities.get("format"); + characterisationResultVersion.setValue(String.format("%s %s",format,propertyValue)); + break; + case "fits": + results.forEach(res -> res.setFilePath(filepath)); + break; + case "well-formed": + CharacterisationResult characterisationResultWellformed = results.get(results.size() - 1); + characterisationResultWellformed.setValue(propertyValue); + break; + case "filepath": + filepath = propertyValue; + default: + String property = elementNameLocalPart; + boolean isPresent = Arrays.stream(Property.values()).anyMatch(item -> property.equalsIgnoreCase(item.name())); + if (isPresent) { + CharacterisationResult characterisationResult = results.get(results.size() - 1); + characterisationResult.setValue(propertyValue); + } + + } + + } + + private void handleText(String text) { + LOG.debug("Text: " + text); + propertyValue = text; + } +} diff --git a/fits-client/src/test/java/rocks/artur/FITSClient/FITSClientTest.java b/fits-client/src/test/java/rocks/artur/FITSClient/FITSClientTest.java new file mode 100644 index 0000000..86b942e --- /dev/null +++ b/fits-client/src/test/java/rocks/artur/FITSClient/FITSClientTest.java @@ -0,0 +1,425 @@ +package rocks.artur.FITSClient; + + +import org.junit.Assert; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.mockserver.integration.ClientAndServer; +import org.mockserver.model.HttpRequest; +import org.mockserver.model.HttpResponse; +import rocks.artur.api_impl.utils.ByteFile; +import rocks.artur.domain.CharacterisationResult; + +import javax.xml.stream.XMLStreamException; +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.List; + +import static org.mockserver.model.HttpRequest.request; +import static org.mockserver.model.HttpResponse.response; + +public class FITSClientTest { + private ClientAndServer mockServer; + + private int MOCK_SERVER_PORT = 8888; + + public static String VALID_FITS_RESULT = "\r\n" + + "\n" + + " \n" + + " \n" + + " \n" + + " fmt/1149\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 903\n" + + " /usr/local/tomcat/webapps/fits/upload/1582118786085/README.md\n" + + " README.md\n" + + " 133c6cf05a139fa2e472ce6fa11bb5d2\n" + + " 1582118786000\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + "\n" + + "\n" + + "\n"; + + + public static String VALID_FITS_RESULT2 = "\r\n" + + "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 1.0\n" + + " fmt/11\n" + + " \n" + + " \n" + + " \n" + + " /usr/local/tomcat/webapps/fits/upload/1596052237783/main\n" + + " main\n" + + " 1875256\n" + + " 926a7c8c079e4ccb837410746b2919e2\n" + + " 1596052237000\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " Deflate/Inflate\n" + + " Deflate\n" + + " 2400\n" + + " 1531\n" + + " normal*\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " Deflate/Inflate\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 2400\n" + + " 1531\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " normal*\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + "\n"; + + + public static String VALID_FITS_RESULT3 = "\r\n" + + "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " 1.4\n" + + " fmt/18\n" + + " \n" + + " \n" + + " \n" + + " 39586\n" + + " /XPP\n" + + " 2011:12:27 10:44:28+01:00\n" + + " 2002:04:25 13:02:24Z\n" + + " /home/petrov/taverna/tmp/000/000009.pdf\n" + + " /home/petrov/taverna/tmp/000/000009.pdf\n" + + " 92ddc75b3b59872e6656b54b8f236764\n" + + " 1324979068000\n" + + " \n" + + " \n" + + " true\n" + + " true\n" + + " \n" + + " \n" + + " \n" + + " Table DP-1. Profile of General Demographic Characteristics: 2000\n" + + " Census 2000 Profiles\n" + + " US Census Bureau\n" + + " 4\n" + + " no\n" + + " yes\n" + + " no\n" + + " no\n" + + " no\n" + + " no\n" + + " \n" + + " \n" + + ""; + + @BeforeEach + void setUp() { + mockServer = mockServer.startClientAndServer(MOCK_SERVER_PORT); + } + + @AfterEach + public void stopServer() { + mockServer.stop(); + } + + @Test + void getVersionTest() throws IOException { + mockServer.when( + HttpRequest.request() + .withMethod("GET") + .withPath("/version") + .withHeader("\"Content-type\", \"application/json\"")) + .respond( + HttpResponse.response() + .withStatusCode(200) + .withBody("1.5.0") + ); + + + FITSClient fitsClient = new FITSClient(); + fitsClient.setFITS_URL(String.format("http://localhost:%d/", MOCK_SERVER_PORT)); + + String s = fitsClient.getVersion(); + Assert.assertEquals("1.5.0", s); + } + + @Test + void processFileAsByteArrayTest() throws IOException { + mockServer.when( + HttpRequest.request() + .withMethod("POST") + .withPath("/fits/examine") + .withHeader("\"Content-type\", \"application/json\"")) + .respond( + HttpResponse.response() + .withStatusCode(200) + .withBody(VALID_FITS_RESULT) + ); + + + URL resource = getClass().getClassLoader().getResource("README.md"); + byte[] array = Files.readAllBytes(Paths.get(resource.getPath())); + + FITSClient fitsClient = new FITSClient(); + fitsClient.setFITS_URL(String.format("http://localhost:%d", MOCK_SERVER_PORT)); + ByteFile byteFile = new ByteFile(array, "testFileName" ); + List output = fitsClient.processFile(byteFile); + + Assert.assertEquals(9, output.size()); + } + + + @Test + void processFileTest() throws IOException { + + mockServer.when( + HttpRequest.request() + .withMethod("POST") + .withPath("/fits/examine") + .withHeader("\"Content-type\", \"application/json\"")) + .respond( + HttpResponse.response() + .withStatusCode(200) + .withBody(VALID_FITS_RESULT) + ); + + FITSClient fitsClient = new FITSClient(); + fitsClient.setFITS_URL(String.format("http://localhost:%d", MOCK_SERVER_PORT)); + + URL resource = getClass().getClassLoader().getResource("README.md"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(9, output.size()); + } + + @Test + void processFITSFileTest() throws IOException { + + mockServer.when( + HttpRequest.request() + .withMethod("POST") + .withPath("/fits/examine") + .withHeader("\"Content-type\", \"application/json\"")) + .respond( + HttpResponse.response() + .withStatusCode(200) + .withBody(VALID_FITS_RESULT) + ); + + FITSClient fitsClient = new FITSClient(); + fitsClient.setFITS_URL(String.format("http://localhost:%d", MOCK_SERVER_PORT)); + + URL resource = getClass().getClassLoader().getResource("998003.csv.fits.xml"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(14, output.size()); + } + + + //The test can be run against running FITS service, i.e. fits-docker + @Disabled + @Test + void processFileTestWithoutMock() throws IOException { + FITSClient fitsClient = new FITSClient(); + fitsClient.setFITS_URL(String.format("http://localhost:%d", 8081)); + + URL resource = getClass().getClassLoader().getResource("README.md"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(12, output.size()); + } + + + //The test can be run against running FITS service, i.e. fits-docker + @Disabled + @Test + void processByteArrayTestWithoutMock() throws IOException { + FITSClient fitsClient = new FITSClient(); + fitsClient.setFITS_URL(String.format("http://localhost:%d", 8081)); + + URL resource = getClass().getClassLoader().getResource("README.md"); + File file = new File(resource.getPath()); + List output = fitsClient.processFile(file); + + Assert.assertEquals(9, output.size()); + } + + + @Test + void processFITSFileCSVTest() throws IOException { + + FITSClient fitsClient = new FITSClient(); + + URL resource = getClass().getClassLoader().getResource("998003.csv.fits.xml"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(14, output.size()); + } + + @Test + void processFITSFileHTMLTest() throws IOException { + + FITSClient fitsClient = new FITSClient(); + + URL resource = getClass().getClassLoader().getResource("002526.html.fits.xml"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(21, output.size()); + } + + @Test + void processFITSFilePDFTest() throws IOException { + + FITSClient fitsClient = new FITSClient(); + + URL resource = getClass().getClassLoader().getResource("000009.pdf.fits.xml"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(28, output.size()); + } + + @Test + void processFITSFileDocTest() throws IOException { + + FITSClient fitsClient = new FITSClient(); + + URL resource = getClass().getClassLoader().getResource("002392.doc.fits.xml"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(22, output.size()); + } + + @Test + void processFITSFileGZTest() throws IOException { + + FITSClient fitsClient = new FITSClient(); + + URL resource = getClass().getClassLoader().getResource("002451.gz.fits.xml"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(10, output.size()); + } + + + @Test + void processFITSFilPDF2Test() throws IOException { + + FITSClient fitsClient = new FITSClient(); + + URL resource = getClass().getClassLoader().getResource("002838.pdf.fits.xml"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(30, output.size()); + } + + @Test + void processFITSFileTEXTest() throws IOException { + + FITSClient fitsClient = new FITSClient(); + + URL resource = getClass().getClassLoader().getResource("002283.tex.fits.xml"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(8, output.size()); + } + + + @Test + void processFITSFilPDF3Test() throws IOException { + + FITSClient fitsClient = new FITSClient(); + + URL resource = getClass().getClassLoader().getResource("002729.pdf.fits.xml"); + List output = fitsClient.processFile(new File(resource.getPath())); + + Assert.assertEquals(11, output.size()); + } + + @Test + void extractCharResultsStax() throws XMLStreamException { + FITSClient fitsClient = new FITSClient(); + List characterisationResults = fitsClient.extractCharacterisationResultsStax(VALID_FITS_RESULT2); + System.out.println(characterisationResults); + } + + @Test + void compareExtractionStaxVsJson() throws XMLStreamException { + FITSClient fitsClient = new FITSClient(); + List characterisationResultsStax = fitsClient.extractCharacterisationResultsStax(VALID_FITS_RESULT3); + List characterisationResultsJSON = fitsClient.extractCharacterisationResults(VALID_FITS_RESULT3); + Assert.assertEquals(characterisationResultsJSON.size(), characterisationResultsStax.size()); + } + +} \ No newline at end of file diff --git a/fits-client/src/test/java/rocks/artur/FITSClient/JSONToolkitTest.java b/fits-client/src/test/java/rocks/artur/FITSClient/JSONToolkitTest.java new file mode 100644 index 0000000..1e8bd6d --- /dev/null +++ b/fits-client/src/test/java/rocks/artur/FITSClient/JSONToolkitTest.java @@ -0,0 +1,75 @@ +package rocks.artur.FITSClient; + +import org.json.JSONException; +import org.junit.Assert; +import org.junit.jupiter.api.Test; +import org.springframework.test.context.ActiveProfiles; +import rocks.artur.FITSObjects.FITSPropertyJsonPath; +import rocks.artur.domain.CharacterisationResult; +import rocks.artur.utils.JSONToolkit; + +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.Arrays; +import java.util.List; +import java.util.Set; + +class JSONToolkitTest { + + @Test + void translateXMLTest() throws JSONException { + String s = JSONToolkit.translateXML(FITSClientTest.VALID_FITS_RESULT); + System.out.println(s); + } + + @Test + void getCharacterisationResult2Test() throws JSONException { + String jsonString = JSONToolkit.translateXML(FITSClientTest.VALID_FITS_RESULT2); + List results = JSONToolkit.getCharacterisationResults(FITSPropertyJsonPath.IDENTIFICATION, + jsonString); + System.out.println(results); + } + + @Test + void getCharacterisationResultTest() throws JSONException { + String jsonString = JSONToolkit.translateXML(FITSClientTest.VALID_FITS_RESULT); + List results = JSONToolkit.getCharacterisationResults(FITSPropertyJsonPath.FILENAME, + jsonString); + + } + + + @Test + void getCharacterisationResultIdentificationTest() throws JSONException { + String jsonString = JSONToolkit.translateXML(FITSClientTest.VALID_FITS_RESULT); + List results = JSONToolkit.getCharacterisationResults(FITSPropertyJsonPath.IDENTIFICATION, + jsonString); + + System.out.println(results); + Assert.assertEquals(7, results.size()); + } + + @Test + void getAvailableFitsPropertiesTest() throws JSONException { + String jsonString = JSONToolkit.translateXML(FITSClientTest.VALID_FITS_RESULT3); + Set availableFitsProperties = JSONToolkit.getAvailableFitsProperties(jsonString); + List objects = Arrays.asList(availableFitsProperties.toArray()); + + Assert.assertEquals("[pageCount, isRightsManaged, created, author, hasAnnotations, title, fslastmodified, valid, isTagged, wellformed, filename, isProtected, size, creatingApplicationName, filepath, lastmodified, md5checksum, hasForms, hasOutline]", objects.toString()); + } + + @Test + void dateTimeFormatterTest() throws JSONException { + //SELECT PROPERTY_VALUE, PARSEDATETIME(PROPERTY_VALUE,'dd-MM-yyyy HH:mm:ss') FROM CHARACTERISATIONRESULTVIEW + //where property='FSLASTMODIFIED' + // + DateTimeFormatter tikaFormatter = DateTimeFormatter.ISO_INSTANT; + Instant dateInstant = Instant.from(tikaFormatter.parse("2008-06-04T22:47:36Z")); + LocalDateTime date = LocalDateTime.ofInstant(dateInstant, ZoneId.of(ZoneOffset.UTC.getId())); + System.out.println(date);; + + } +} \ No newline at end of file diff --git a/infra-persistence/pom.xml b/infra-persistence/pom.xml index e0abc18..8077390 100644 --- a/infra-persistence/pom.xml +++ b/infra-persistence/pom.xml @@ -15,17 +15,30 @@ - + + + rocks.artur + fitsinn-core + 0.1.0 + compile + + + + org.springframework.boot spring-boot-starter-data-jpa - junit - junit - ${junit.version} - test + org.springframework.boot + spring-boot-starter-jdbc + + + org.springframework.boot + spring-boot-starter-validation + + org.springframework.boot spring-boot-starter-test @@ -37,6 +50,30 @@ + + + + + com.clickhouse + clickhouse-jdbc + 0.6.0 + all + + + + + com.h2database + h2 + ${h2.version} + + + + mysql + mysql-connector-java + 8.0.33 + runtime + + org.hibernate.orm hibernate-jpamodelgen @@ -49,29 +86,27 @@ 4.0.0 pom - - org.hibernate - hibernate-validator - 8.0.0.Final - - org.glassfish.jaxb jaxb-runtime 4.0.0 + - rocks.artur - fitsinn-core - 0.1.0 - compile + org.springdoc + springdoc-openapi-starter-webmvc-ui + 2.0.2 + + + - com.h2database - h2 - ${h2.version} + junit + junit + ${junit.version} + test - + diff --git a/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultClickhouseRepository.java b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultClickhouseRepository.java new file mode 100644 index 0000000..a6d8353 --- /dev/null +++ b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultClickhouseRepository.java @@ -0,0 +1,572 @@ +package rocks.artur.clickhouse; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.jdbc.core.ParameterizedPreparedStatementSetter; +import rocks.artur.api_impl.filter.AndFilterCriteria; +import rocks.artur.api_impl.filter.OrFilterCriteria; +import rocks.artur.api_impl.filter.SingleFilterCriteria; +import rocks.artur.domain.CharacterisationResult; +import rocks.artur.domain.FilterCriteria; +import rocks.artur.domain.Property; +import rocks.artur.domain.ValueType; +import rocks.artur.domain.statistics.PropertiesPerObjectStatistic; +import rocks.artur.domain.statistics.PropertyStatistic; + +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + + +public class CharacterisationResultClickhouseRepository { + + private static final Logger LOG = LoggerFactory.getLogger(CharacterisationResultClickhouseRepository.class); + private final JdbcTemplate template; + + /** + * Creates a new instance. + * + * @param template to use to perform JDBC queries to the ClickHouse database. + */ + public CharacterisationResultClickhouseRepository(JdbcTemplate template) { + this.template = template; + } + + public void save(CharacterisationResult characterisationResult, String datasetName) { + this.createDb(datasetName); + int rowsInserted = template.update(String.format("insert into %s.characterisationresult (file_path,property, source, property_value, value_type)" + + " values (?,?,?,?,?)", datasetName), + characterisationResult.getFilePath(), + characterisationResult.getProperty().name(), + characterisationResult.getSource(), + characterisationResult.getValue(), + characterisationResult.getValueType().name()); + + System.out.println("Number of rows updated = " + rowsInserted); + } + + public List getPropertyDistribution(String datasetName) { + String sql = String.format( + "select property, count(property_value) as number " + + "from %s.characterisationresultaggregated " + + "group by property ORDER BY number desc LIMIT 200", datasetName); + + List result = template.query(sql, (rs, rowNum) -> { + PropertyStatistic propstat = new PropertyStatistic(rs.getLong("number"), Property.valueOf(rs.getString("property"))); + return propstat; + }); + return result; + } + + public List getPropertyValueDistribution(String property, FilterCriteria filter, String datasetName) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter, datasetName); + subquery = String.format(" file_path in (%s) and ", subquery); + } + + String sql = String.format( + "select property, property_value, count(property_value) as number " + + "from %s.characterisationresultaggregated " + + "where %s property = '%s' group by property, property_value ORDER BY number desc LIMIT 200", datasetName, subquery, property); + LOG.info(sql); + List result = template.query(sql, (rs, rowNum) -> { + Object[] item = new Object[3]; + item[0] = rs.getString("property"); + item[1] = rs.getString("property_value"); + item[2] = rs.getLong("number"); + return item; + }); + return result; + } + + + public List getPropertyValueTimeStampDistribution(String property, FilterCriteria filter, String datasetName) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter, datasetName); + subquery = String.format(" file_path in (%s) and ", subquery); + } + + String sql = String.format( + "select property, CASE " + + "WHEN property_value = 'CONFLICT' THEN property_value " + + "ELSE SUBSTRING(property_value,1,4) " + + "END as value, count(property) as number " + + "from %s.characterisationresultaggregated " + + "where %s property = '%s' group by property, CASE " + + "WHEN property_value = 'CONFLICT' THEN property_value " + + "ELSE SUBSTRING(property_value,1,4) " + + "END ORDER BY number desc LIMIT 200", datasetName, subquery, property); + + List result = template.query(sql, (rs, rowNum) -> { + Object[] item = new Object[3]; + item[0] = rs.getString(1); + item[1] = rs.getString(2); + item[2] = rs.getLong(3); + return item; + }); + return result; + } + + + public String convert(FilterCriteria filter, String datasetName) { + if (filter instanceof SingleFilterCriteria) { + Property property = ((SingleFilterCriteria) filter).getSearchKey(); + String operator = ((SingleFilterCriteria) filter).getOperation().getValue(); + String value = ((SingleFilterCriteria) filter).getSearchValue(); + String result; + switch (property.getValueType()) { + case TIMESTAMP: + if (!value.equals("CONFLICT")) { + result = String.format("select file_path from %s.characterisationresult where property = '%s' and cast(property_value as DATETIME) %s cast('%s' as DATE)", datasetName, property, operator, value); + } else { + result = String.format("select file_path from %s.characterisationresultaggregated where property = '%s' and property_value %s '%s'", datasetName, property, operator, value); + } + break; + default: + result = String.format("select file_path from %s.characterisationresultaggregated where property = '%s' and property_value %s '%s'", datasetName, property, operator, value); + } + return result; + } else if (filter instanceof AndFilterCriteria) { + AndFilterCriteria andFilter = (AndFilterCriteria) filter; + + String whereStatement1 = convert(andFilter.getCriteria(), datasetName); + String whereStatement2 = convert(andFilter.getOtherCriteria(), datasetName); + + String result = String.format("( (%s) INTERSECT (%s) )", whereStatement1, whereStatement2); + return result; + + } else if (filter instanceof OrFilterCriteria) { + OrFilterCriteria orFilter = (OrFilterCriteria) filter; + + String whereStatement1 = convert(orFilter.getCriteria(), datasetName); + String whereStatement2 = convert(orFilter.getOtherCriteria(), datasetName); + + String result = String.format("( (%s) UNION ALL (%s) )", whereStatement1, whereStatement2); + return result; + } else { + throw new UnsupportedOperationException("this type of FilterCriteria is not supported"); + } + } + + public void saveAll(List characterisationResults, String datasetName) { + this.createDb(datasetName); + List filtered = characterisationResults.stream() + .filter(item -> item.getFilePath() != null) + .filter(item -> item.getValue() != null && item.getValue().length() < 300).collect(Collectors.toList()); + + template.batchUpdate(String.format("insert into %s.characterisationresult (file_path,property, source, property_value, value_type)" + + " values (?,?,?,?,?)", datasetName), + filtered, + 10000, + new ParameterizedPreparedStatementSetter() { + @Override + public void setValues(PreparedStatement ps, CharacterisationResult cResult) throws SQLException { + ps.setString(1, cResult.getFilePath()); + ps.setString(2, cResult.getProperty().name()); + ps.setString(3, cResult.getSource()); + ps.setString(4, cResult.getValue()); + ps.setString(5, cResult.getValueType().name()); + } + }); + + } + + public List getCharacterisationResults(FilterCriteria filter, String datasetName) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter, datasetName); + subquery = String.format("where file_path in (%s) ", subquery); + } + + + String sql = String.format( + "select file_path,property, source, property_value, value_type " + + "from %s.characterisationresult " + + "%s", datasetName, subquery); + + List result = template.query(sql, (rs, rowNum) -> { + CharacterisationResult item = new CharacterisationResult(); + item.setFilePath(rs.getString(1)); + item.setProperty(Property.valueOf(rs.getString(2))); + item.setSource(rs.getString(3)); + item.setValue(rs.getString(4)); + item.setValueType(ValueType.valueOf(rs.getString(5))); + return item; + }); + return result; + } + + public Long getDigitalObjectCount(String datasetName) { + String query = String.format( + "select count(distinct file_path) from %s.characterisationresultaggregated ", datasetName); + + Long result = template.queryForObject(query, Long.class); + return result; + } + + public Long getConflictCount(String datasetName) { + String query = String.format( + "select count(distinct file_path) from %s.characterisationresultaggregated where property_value = 'CONFLICT' ", datasetName); + + Long result = template.queryForObject(query, Long.class); + return result; + } + + public List getSources(String datasetName) { + String sql = String.format( + "select distinct source from %s.characterisationresult ", datasetName); + + List result = template.query(sql, (rs, rowNum) -> { + return rs.getString(1); + }); + return result; + } + + public List getCharacterisationResultsByFilepath(String filePath, String datasetName) { + String sql = String.format( + "select file_path, property, source, property_value, value_type " + + "from %s.characterisationresult " + + "where file_path='%s' ", datasetName, filePath); + + List result = template.query(sql, (rs, rowNum) -> { + CharacterisationResult item = new CharacterisationResult(); + item.setFilePath(rs.getString(1)); + item.setProperty(Property.valueOf(rs.getString(2))); + item.setSource(rs.getString(3)); + item.setValue(rs.getString(4)); + item.setValueType(ValueType.valueOf(rs.getString(5))); + return item; + }); + return result; + } + + public double[] getSizeStatistics(FilterCriteria filter, String datasetName) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter, datasetName); + subquery = String.format(" file_path in (%s) and ", subquery); + } + + String sql = String.format( + "select sum(toInt32(property_value)) as totalsize, " + + "min(toInt32(property_value)) as minsize, " + + "max(toInt32(property_value)) as maxsize, " + + "avg(toInt32(property_value)) as avgsize, " + + "count(property_value) as count " + + "from %s.characterisationresultaggregated " + + "where %s property='SIZE'", datasetName, subquery); + + List result = template.query(sql, (rs, rowNum) -> { + double sum = rs.getDouble(1); + double min = rs.getDouble(2); + double max = rs.getDouble(3); + double avg = rs.getDouble(4); + double count = rs.getDouble(5); + + return new double[]{sum, min, max, avg, count}; + }); + return result.get(0); + + } + + public double[] getConflictStatistics(FilterCriteria filter, String datasetName) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter, datasetName); + subquery = String.format(" file_path in (%s) and ", subquery); + } + + String sql = String.format( + "select count(distinct file_path) as count " + + "from %s.characterisationresultaggregated " + + "where %s property_value='CONFLICT'", datasetName, subquery); + + Long conflictsCount = template.queryForObject(sql, Long.class); + + + String subquery2 = ""; + if (filter != null) { + subquery2 = convert(filter, datasetName); + subquery2 = String.format("where file_path in (%s) ", subquery2); + } + + String sql2 = String.format( + "select count(distinct file_path) as count " + + "from %s.characterisationresultaggregated " + + "%s", datasetName, subquery2); + + Long totalCount = template.queryForObject(sql2, Long.class); + + double rate = 0d; + if (totalCount != 0) { + rate = (double) conflictsCount / totalCount; + } + double[] result = new double[]{conflictsCount, rate}; + return result; + } + + public List getObjects(FilterCriteria filter, String datasetName) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter, datasetName); + subquery = String.format(" where file_path in (%s) ", subquery); + } + + String sql = String.format( + "select file_path, count(*) " + + "from %s.characterisationresultaggregated " + + " %s" + + "group by file_path", datasetName, subquery); + + List result = template.query(sql, (rs, rowNum) -> { + PropertiesPerObjectStatistic statistic = new PropertiesPerObjectStatistic(rs.getLong(2), rs.getString(1)); + return statistic; + + }); + + return result; + } + + public List getRandomSamples(FilterCriteria filterCriteria, int sampleSize, String datasetName) { + String subquery = ""; + if (filterCriteria != null) { + subquery = convert(filterCriteria, datasetName); + subquery = String.format(" where file_path in (%s) ", subquery); + } + + String sql = String.format( + "select file_path " + + "from %s.characterisationresultaggregated " + + " %s" + + "group by file_path ORDER BY RAND() LIMIT %d ", datasetName, subquery, sampleSize); + + List resultList = template.query(sql, (rs, rowNum) -> rs.getString(1)); + List collect = resultList.stream().map(item -> new String[]{"1", item}).collect(Collectors.toList()); + + return collect; + + } + + public List getSelectiveFeatureDistributionSamples(FilterCriteria filterCriteria, List properties, String datasetName) { + String subquery = ""; + if (filterCriteria != null) { + subquery = convert(filterCriteria, datasetName); + subquery = String.format(" where file_path in (%s) ", subquery); + } + + + + StringBuilder select = new StringBuilder("SELECT "); + + for (int i = 0; i < properties.size(); i++) { + String currProperty = properties.get(i).name(); + if (i == 0) { + select.append(String.format("count(%s.file_path) as size, min(%s.file_path) as example, %s.property_value ", currProperty, currProperty, currProperty)); + } else { + select.append(String.format(", %s.property_value ", currProperty)); + } + } + + StringBuilder from = new StringBuilder("FROM "); + + for (int i = 0; i < properties.size(); i++) { + String currProperty = properties.get(i).name(); + if (i == 0) { + + from.append(String.format(" (SELECT v.property_value, v.file_path FROM %s.characterisationresultaggregated v\n" + + "where %s v.property='%s' ) as %s ", datasetName, subquery, currProperty, currProperty)); + } else { + from.append(String.format(" join (SELECT v.property_value, v.file_path FROM %s.characterisationresultaggregated v\n" + + "where %s v.property='%s') as %s on %s.file_path=%s.file_path ", datasetName, subquery, currProperty, currProperty, properties.get(0).name(), currProperty)); + } //TODO: Probably, the join is not required. Check if it is true. + } + + StringBuilder groupBy = new StringBuilder("GROUP BY "); + + for (int i = 0; i < properties.size(); i++) { + String currProperty = properties.get(i).name(); + if (i == 0) { + groupBy.append(String.format(" %s.property_value ", currProperty)); + } else { + groupBy.append(String.format(", %s.property_value ", currProperty)); + } + } + + + StringBuilder orderBy = new StringBuilder("ORDER BY size DESC"); + + String sql = String.format( + "%s %s %s %s", select, from, groupBy, orderBy); + System.out.println(sql); + + + List result = template.query(sql, (rs, rowNum) -> { + return new String[]{rs.getString(1), rs.getString(2)}; + }); + + return result; + } + + + public void resolveConflictsSimple(String datasetName){ + /* + DROP TABLE IF EXISTS to_delete; + + CREATE TABLE to_delete + ( + file_path String, + property String, + source String + ) ENGINE = Memory; + + insert into to_delete + with weights as ( + SELECT source, + property, + COUNT(property_value) as count, + COUNT(property_value) * 1.0/ (SELECT count(property_value) FROM characterisationresultaggregated + WHERE property_value != 'CONFLICT' ) as weight + FROM characterisationresult + WHERE file_path in (SELECT file_path FROM characterisationresultaggregated WHERE property_value != 'CONFLICT' ) + GROUP BY source, property + ), + tmp_table as ( + SELECT file_path, property, source, property_value, weight FROM characterisationresult + JOIN weights on characterisationresult.property == weights.property and characterisationresult.source == weights.source + WHERE (file_path, property) in (SELECT file_path, property from characterisationresultaggregated WHERE property_value == 'CONFLICT') + ) + SELECT file_path,property,source FROM tmp_table + WHERE (file_path, property, weight) not in (SELECT file_path, property, MAX(weight) FROM tmp_table GROUP BY file_path, property); + + delete from characterisationresult + where (file_path, property, source) in (select file_path,property,source from to_delete); + + drop table IF EXISTS characterisationresultaggregated; + */ + + + String sql = String.format("DROP TABLE IF EXISTS %s.to_delete;", datasetName); + int update = template.update(sql); + + + sql = String.format("" + + " CREATE TABLE %s.to_delete\n" + + " (\n" + + " file_path String,\n" + + " property String,\n" + + " source String\n" + + " ) ENGINE = Memory;", datasetName); + update = template.update(sql); + + sql = String.format("" + + " insert into %s.to_delete\n" + + " with weights as (\n" + + " SELECT source,\n" + + " property,\n" + + " COUNT(property_value) as count,\n" + + " COUNT(property_value) * 1.0/ (SELECT count(property_value) FROM %s.characterisationresultaggregated\n" + + " WHERE property_value != 'CONFLICT' ) as weight\n" + + " FROM %s.characterisationresult\n" + + " WHERE file_path in (SELECT file_path FROM %s.characterisationresultaggregated WHERE property_value != 'CONFLICT' )\n" + + " GROUP BY source, property\n" + + " ),\n" + + " tmp_table as (\n" + + " SELECT file_path, property, source, property_value, weight FROM %s.characterisationresult\n" + + " JOIN weights on %s.characterisationresult.property == weights.property and %s.characterisationresult.source == weights.source\n" + + " WHERE (file_path, property) in (SELECT file_path, property from %s.characterisationresultaggregated WHERE property_value == 'CONFLICT')\n" + + " )\n" + + " SELECT file_path,property,source FROM tmp_table\n" + + " WHERE (file_path, property, weight) not in (SELECT file_path, property, MAX(weight) FROM tmp_table GROUP BY file_path, property);", datasetName, datasetName, datasetName, datasetName, datasetName, datasetName, datasetName, datasetName); + update = template.update(sql); + + sql = String.format("" + + " delete from %s.characterisationresult\n" + + " where (file_path, property, source) in (select file_path,property,source from %s.to_delete);", datasetName, datasetName); + update = template.update(sql); + + this.cleanAggregation(datasetName); + } + + + + void aggregateResults(String datasetName){ + /* + CREATE TABLE IF NOT EXISTS characterisationresultaggregated + ENGINE = AggregatingMergeTree + ORDER BY (property, file_path) AS + SELECT file_path, property, + CASE + WHEN COUNT(distinct property_value) = 1 THEN MIN(property_value) + ELSE 'CONFLICT' + END AS property_value + FROM characterisationresult + GROUP BY property, file_path; + */ + String sql = String.format("" + + "CREATE TABLE IF NOT EXISTS %s.characterisationresultaggregated\n" + + "ENGINE = AggregatingMergeTree\n" + + " ORDER BY (property, file_path) AS\n" + + "SELECT file_path, property,\n" + + " CASE\n" + + " WHEN COUNT(distinct property_value) = 1 THEN MIN(property_value)\n" + + " ELSE 'CONFLICT'\n" + + " END AS property_value\n" + + "FROM %s.characterisationresult\n" + + "GROUP BY property, file_path;", datasetName, datasetName + ); + template.update(sql); + } + + void cleanAggregation(String datasetName){ + String sql = String.format("drop table IF EXISTS %s.characterisationresultaggregated", datasetName); + int update = template.update(sql); + } + + + void createDb(String datasetName) { + String sql = String.format("create database if not exists %s", datasetName); + int update = template.update(sql); + + + + /* + + CREATE TABLE newdb.characterisationresult + ( + file_path String, + property String, + source String, + property_value String, + value_type String + ) ENGINE = ReplacingMergeTree + PRIMARY KEY (source, property, file_path) + ORDER BY (source, property, file_path); + + */ + sql = String.format("CREATE TABLE IF NOT EXISTS %s.characterisationresult\n" + + "(\n" + + " file_path String,\n" + + " property String,\n" + + " source String,\n" + + " property_value String,\n" + + " value_type String\n" + + ") ENGINE = ReplacingMergeTree\n" + + " PRIMARY KEY (source, property, file_path)\n" + + " ORDER BY (source, property, file_path);", datasetName); + update = template.update(sql); + } + + public List listDatasets() { + String sql = String.format("SELECT name FROM system.databases"); + + List resultList = template.query(sql, (rs, rowNum) -> rs.getString(1)); + List to_remove = Arrays.asList("system", "information_schema", "INFORMATION_SCHEMA"); + resultList.removeAll(to_remove); + return resultList; + } +} diff --git a/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultGatewayClickhouseImpl.java b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultGatewayClickhouseImpl.java new file mode 100644 index 0000000..9f2559e --- /dev/null +++ b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultGatewayClickhouseImpl.java @@ -0,0 +1,207 @@ +package rocks.artur.clickhouse; + +import org.springdoc.core.customizers.ActuatorOperationCustomizer; +import rocks.artur.api_impl.filter.SingleFilterCriteria; +import rocks.artur.domain.*; +import rocks.artur.domain.statistics.BinningAlgorithms; +import rocks.artur.domain.statistics.PropertiesPerObjectStatistic; +import rocks.artur.domain.statistics.PropertyStatistic; +import rocks.artur.domain.statistics.PropertyValueStatistic; +import rocks.artur.jpa.view.CharacterisationResultViewJPA; + +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class CharacterisationResultGatewayClickhouseImpl implements CharacterisationResultGateway { + + CharacterisationResultClickhouseRepository repository; + public CharacterisationResultGatewayClickhouseImpl(CharacterisationResultClickhouseRepository repository) { + this.repository = repository; + } + + + + @Override + public void addCharacterisationResult(CharacterisationResult characterisationResult, String datasetName) { + repository.save(characterisationResult, datasetName); + repository.cleanAggregation(datasetName); + } + + @Override + public List getCharacterisationResults(FilterCriteria filter, String datasetName) { + return repository.getCharacterisationResults(filter, datasetName); + } + + @Override + public List getPropertyDistribution(FilterCriteria filter, String datasetName) { + repository.aggregateResults(datasetName); + return repository.getPropertyDistribution(datasetName); + } + + @Override + public List getCharacterisationResultsByFilepath(String filePath, String datasetName) { + return repository.getCharacterisationResultsByFilepath(filePath,datasetName); + } + + @Override + public List getCharacterisationResultsByEntry(Entry entry, String datasetName) { + return null; + } + + @Override + public List getConflictEntries(String datasetName) { + return null; + } + + @Override + public List getEntries(String datasetName) { + return null; + } + + @Override + public List getConflictsByFilepath(String filepath, String datasetName) { + repository.aggregateResults(datasetName); + List results = new ArrayList<>(); + List allJPAByFilePath = getCharacterisationResultsByFilepath(filepath, datasetName); + List properties = allJPAByFilePath.stream().map(item -> item.getProperty()).collect(Collectors.toList()); + + for (Property property : properties) { + List collect = allJPAByFilePath.stream().filter(item -> item.getProperty().equals(property)).toList(); + if (collect.stream().map(CharacterisationResult::getValue).distinct().count() > 1) { + results.addAll(collect); + } + } + return results; + } + + @Override + public Map getCollectionStatistics(FilterCriteria filterCriteria, String datasetName) { + + repository.aggregateResults(datasetName); + Map result = new HashMap<>(); + + double[] sizeStatistics = repository.getSizeStatistics(filterCriteria, datasetName); + result.put("totalSize", sizeStatistics[0]); + result.put("minSize", sizeStatistics[1]); + result.put("maxSize", sizeStatistics[2]); + result.put("avgSize", sizeStatistics[3]); + result.put("totalCount", sizeStatistics[4]); + + double[] conflictStatistics = repository.getConflictStatistics(filterCriteria, datasetName); + result.put("conflictRate", conflictStatistics[1]); + result.put("conflictCount", conflictStatistics[0]); + return result; + } + + @Override + public List getPropertyValueDistribution(Property property, FilterCriteria filter, String datasetName) { + repository.aggregateResults(datasetName); + switch (property.getValueType()) { + case TIMESTAMP: { + List collect = null; + List propertyValueDistribution = + repository.getPropertyValueTimeStampDistribution(property.name(), filter, datasetName); + collect = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0])) + .map(stat -> new PropertyValueStatistic((Long) stat[2], (String) stat[1])) + .collect(Collectors.toList()); + collect.sort(Comparator.comparingLong(PropertyValueStatistic::getCount).reversed()); + return collect; + } + case INTEGER: + case FLOAT: { + List propertyValueDistribution = + repository.getPropertyValueDistribution(property.name(), filter, datasetName); + + List floats = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0]) && !(stat[1].equals("CONFLICT"))) + .map(stat -> { + Float val = Float.parseFloat(stat[1].toString()); + Long count = (Long) stat[2]; + + List result = new ArrayList<>(); + + for (long l = 0; l < count; l++) { + result.add(val); + } + return result; + } + ).flatMap(Collection::stream).sorted(Float::compare).collect(Collectors.toList()); + + List propertyValueStatistics = BinningAlgorithms.runBinning(floats); + + Optional conflicts = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0]) && stat[1].equals("CONFLICT")) + .map(stat -> (Long) stat[2]).findAny(); + + conflicts.ifPresent(aLong -> propertyValueStatistics.add(new PropertyValueStatistic(aLong, "CONFLICT"))); + + return propertyValueStatistics; + } + default: + List collect = null; + List propertyValueDistribution = + repository.getPropertyValueDistribution(property.name(), filter, datasetName); + collect = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0])) + .map(stat -> new PropertyValueStatistic((Long) stat[2], (String) stat[1])) + .collect(Collectors.toList()); + collect.sort(Comparator.comparingLong(PropertyValueStatistic::getCount).reversed()); + return collect; + } + } + + @Override + public List getSources(String datasetName) { + return repository.getSources(datasetName); + } + + @Override + public List getObjects(FilterCriteria filterCriteria, String datasetName) { + return repository.getObjects(filterCriteria, datasetName); + } + + @Override + public List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties, String datasetName) { + repository.aggregateResults(datasetName); + switch (algorithm) { + case RANDOM -> { + List samples = repository.getRandomSamples(filterCriteria, 10, datasetName); + return samples; + } + case SELECTIVE_FEATURE_DISTRIBUTION -> { + List selectiveFeatureDistributionSamples = repository.getSelectiveFeatureDistributionSamples(filterCriteria, properties, datasetName); + //List examples = selectiveFeatureDistributionSamples.stream().map(arr -> arr[1]).collect(Collectors.toList()); + return selectiveFeatureDistributionSamples; + } + } + return null; + } + + @Override + public void addCharacterisationResults(List characterisationResults, String datasetName) { + repository.saveAll(characterisationResults, datasetName); + repository.cleanAggregation(datasetName); + } + + @Override + public double getConflictRate(String datasetName) { + repository.aggregateResults(datasetName); + Long totalCount = repository.getDigitalObjectCount(datasetName); + Long conflictCount = repository.getConflictCount(datasetName); + return conflictCount / (double) totalCount; + } + + @Override + public void delete(CharacterisationResult characterisationResult, String datasetName) { + + } + + @Override + public void resolveConflictsNative(String datasetName) { + repository.resolveConflictsSimple(datasetName); + repository.aggregateResults(datasetName); + } + + @Override + public List listDatasets() { + return repository.listDatasets(); + } +} diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java b/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java index e53ee2e..dbc24a8 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java @@ -4,7 +4,7 @@ import jakarta.transaction.Transactional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import rocks.artur.domain.Entry; +import org.springframework.web.servlet.tags.EditorAwareTag; import rocks.artur.domain.*; import rocks.artur.domain.statistics.BinningAlgorithms; import rocks.artur.domain.statistics.PropertiesPerObjectStatistic; @@ -28,18 +28,19 @@ public class CharacterisationResultGatewayJpaImpl implements CharacterisationRes CharacterisationResultViewRepository characterisationResultViewRepository) { this.characterisationResultRepository = characterisationResultRepository; this.characterisationResultViewRepository = characterisationResultViewRepository; + } @Override @Transactional - public void addCharacterisationResult(CharacterisationResult characterisationResult) { + public void addCharacterisationResult(CharacterisationResult characterisationResult, String datasetName) { CharacterisationResultJPA toSave = new CharacterisationResultJPA(characterisationResult); LOG.debug("saving " + toSave.toString()); characterisationResultRepository.save(toSave); } @Override - public List getCharacterisationResults(FilterCriteria filter) { + public List getCharacterisationResults(FilterCriteria filter, String datasetName) { List all = characterisationResultRepository.findAll(); List result = all.stream().map(item -> new CharacterisationResult(Property.valueOf(item.getProperty()), item.getValue(), ValueType.valueOf(item.getValueType()), item.getSource(), item.getFilePath())).collect(Collectors.toList()); @@ -47,7 +48,7 @@ public List getCharacterisationResults(FilterCriteria getPropertyDistribution(FilterCriteria filter) { + public List getPropertyDistribution(FilterCriteria filter, String datasetName) { //Specification convert = convert(filter); //final Map result = @@ -64,16 +65,16 @@ public List getPropertyDistribution(FilterCriteria filter) { return collect; } - - public List getPropertyValueDistribution(Property property, FilterCriteria filter) { + @Override + public List getPropertyValueDistribution(Property property, FilterCriteria filter, String datasetName) { switch (property.getValueType()) { case TIMESTAMP: { List collect = null; List propertyValueDistribution = - characterisationResultViewRepository.getPropertyValueTimeStampDistribution(property.toString(), filter); - collect = propertyValueDistribution.stream() - .map(stat -> new PropertyValueStatistic((Long) stat[1], (String) stat[0])) + characterisationResultViewRepository.getPropertyValueTimeStampDistribution(property.name(), filter); + collect = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0])) + .map(stat -> new PropertyValueStatistic((Long) stat[2], (String) stat[1])) .collect(Collectors.toList()); collect.sort(Comparator.comparingLong(PropertyValueStatistic::getCount).reversed()); return collect; @@ -81,26 +82,26 @@ public List getPropertyValueDistribution(Property proper case INTEGER: case FLOAT: { List propertyValueDistribution = - characterisationResultViewRepository.getPropertyValueDistribution(property.toString(), filter); + characterisationResultViewRepository.getPropertyValueDistribution(property.name(), filter); - List floats = propertyValueDistribution.stream().filter(stat -> !(stat[0].equals("CONFLICT"))) + List floats = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0]) && !(stat[1].equals("CONFLICT"))) .map(stat -> { - Float val = Float.parseFloat(stat[0].toString()); - Long count = (Long) stat[1]; + Float val = Float.parseFloat(stat[1].toString()); + Long count = (Long) stat[2]; - List result = new ArrayList<>(); + List result = new ArrayList<>(); - for (long l=0; l < count; l++){ - result.add(val); - } - return result; - } + for (long l = 0; l < count; l++) { + result.add(val); + } + return result; + } ).flatMap(Collection::stream).sorted(Float::compare).collect(Collectors.toList()); List propertyValueStatistics = BinningAlgorithms.runBinning(floats); - Optional conflicts = propertyValueDistribution.stream().filter(stat -> stat[0].equals("CONFLICT")) - .map(stat -> (Long) stat[1]).findAny(); + Optional conflicts = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0]) && stat[1].equals("CONFLICT")) + .map(stat -> (Long) stat[2]).findAny(); conflicts.ifPresent(aLong -> propertyValueStatistics.add(new PropertyValueStatistic(aLong, "CONFLICT"))); @@ -109,9 +110,9 @@ public List getPropertyValueDistribution(Property proper default: List collect = null; List propertyValueDistribution = - characterisationResultViewRepository.getPropertyValueDistribution(property.toString(), filter); - collect = propertyValueDistribution.stream() - .map(stat -> new PropertyValueStatistic((Long) stat[1], (String) stat[0])) + characterisationResultViewRepository.getPropertyValueDistribution(property.name(), filter); + collect = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0])) + .map(stat -> new PropertyValueStatistic((Long) stat[2], (String) stat[1])) .collect(Collectors.toList()); collect.sort(Comparator.comparingLong(PropertyValueStatistic::getCount).reversed()); return collect; @@ -120,7 +121,7 @@ public List getPropertyValueDistribution(Property proper } @Override - public List getCharacterisationResultsByFilepath(String filepath) { + public List getCharacterisationResultsByFilepath(String filepath, String datasetName) { List allJPAByFilePath = characterisationResultRepository.findAllByFilePath(filepath); List result = allJPAByFilePath.stream().map(item -> new CharacterisationResult(Property.valueOf(item.getProperty()), item.getValue(), ValueType.valueOf(item.getValueType()), item.getSource(), item.getFilePath())).collect(Collectors.toList()); @@ -128,7 +129,7 @@ public List getCharacterisationResultsByFilepath(String } @Override - public List getCharacterisationResultsByEntry(Entry entry) { + public List getCharacterisationResultsByEntry(Entry entry, String datasetName) { List allJPAByFilePath = characterisationResultRepository.findAllByFilePath(entry.getFilepath()); List result = allJPAByFilePath.stream().filter(item -> item.getProperty().equals(entry.getProperty().toString())).map(item -> new CharacterisationResult(Property.valueOf(item.getProperty()), item.getValue(), ValueType.valueOf(item.getValueType()), item.getSource(), item.getFilePath())).collect(Collectors.toList()); @@ -136,21 +137,21 @@ public List getCharacterisationResultsByEntry(Entry entr } @Override - public List getConflictEntries() { + public List getConflictEntries(String datasetName) { List conflictEntries = characterisationResultViewRepository.getConflictEntries(); List result = conflictEntries.stream().map(item -> new Entry(item[0], item[1])).collect(Collectors.toList()); return result; } @Override - public List getEntries() { + public List getEntries(String datasetName) { List filepathProperty = characterisationResultRepository.getFilepathProperty(); List result = filepathProperty.stream().map(item -> new Entry(item[0].toString(), item[1].toString())).collect(Collectors.toList()); return result; } - - public List getConflictsByFilepath(String filepath) { + @Override + public List getConflictsByFilepath(String filepath, String datasetName) { List allJPAByFilePath = characterisationResultViewRepository.findAllByFilePath(filepath); List result = allJPAByFilePath.stream().filter(item -> item.getValue().equals("CONFLICT")).map(item -> new CharacterisationResult(Property.valueOf(item.getProperty()), item.getValue(), ValueType.valueOf(item.getValueType()), null, item.getFilePath())).collect(Collectors.toList()); @@ -158,7 +159,7 @@ public List getConflictsByFilepath(String filepath) { } @Override - public Map getCollectionStatistics(FilterCriteria filterCriteria) { + public Map getCollectionStatistics(FilterCriteria filterCriteria, String datasetName) { Map result = new HashMap<>(); double[] sizeStatistics = characterisationResultViewRepository.getSizeStatistics(filterCriteria); @@ -168,20 +169,20 @@ public Map getCollectionStatistics(FilterCriteria filterCriteria result.put("avgSize", sizeStatistics[3]); result.put("totalCount", sizeStatistics[4]); - double[] conflictStatistics = characterisationResultViewRepository.getConflictStatistics(filterCriteria); + double[] conflictStatistics = characterisationResultViewRepository.getConflictStatistics(filterCriteria); result.put("conflictRate", conflictStatistics[1]); result.put("conflictCount", conflictStatistics[0]); return result; } @Override - public List getSources() { + public List getSources(String datasetName) { List sources = characterisationResultViewRepository.getSources(); return sources; } @Override - public List getObjects(FilterCriteria filterCriteria) { + public List getObjects(FilterCriteria filterCriteria, String datasetName) { List propertyValueDistribution = characterisationResultViewRepository.getObjects(filterCriteria); @@ -194,7 +195,7 @@ public List getObjects(FilterCriteria filterCriter } @Override - public List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties) { + public List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties, String datasetName) { switch (algorithm) { case RANDOM -> { List samples = characterisationResultViewRepository.getRandomSamples(filterCriteria, 10); @@ -210,26 +211,54 @@ public List getSamples(FilterCriteria filterCriteria, SamplingAlgorith } @Override - public void addCharacterisationResults(List characterisationResults) { - - List collect = characterisationResults.parallelStream().map(res -> { - CharacterisationResultJPA toSave = new CharacterisationResultJPA(res); - return toSave; - }).collect(Collectors.toList()); + public void addCharacterisationResults(List characterisationResults, String datasetName) { + List tmp = new ArrayList<>(); + characterisationResults.stream().forEach(item -> { + if (null == item) { + LOG.error("Bad characterisation result: " + item); + } else { + CharacterisationResultJPA characterisationResultJPA = new CharacterisationResultJPA(item); + String value = characterisationResultJPA.getValue(); + if (value != null) { + if (value.length() > 255) { + characterisationResultJPA.setValue(value.substring(0, 255)); + } + tmp.add(characterisationResultJPA); + } + } + }); + try { + characterisationResultRepository.saveAll(tmp); + } catch (RuntimeException e) { + LOG.error("Some characterisation results have already been persisted. Batch insert is not possible. Uploaded items with NULL values:" ); + List collect = tmp.stream().filter(item -> item.getSource() == null || item.getProperty() == null || item.getFilePath() == null).collect(Collectors.toList()); + LOG.error(collect.toString()); + e.printStackTrace(); + throw new IllegalArgumentException("Some characterisation results have already been persisted. Batch insert is not possible."); - LOG.debug("saving " + collect); - characterisationResultRepository.saveAll(collect); + } } @Override - public double getConflictRate() { + public double getConflictRate(String datasetName) { Long totalCount = characterisationResultViewRepository.getTotalCount(); Long conflictCount = characterisationResultViewRepository.getConflictCount(); - return conflictCount/(double)totalCount; + return conflictCount / (double) totalCount; } @Override - public void delete(CharacterisationResult characterisationResult) { + public void delete(CharacterisationResult characterisationResult, String datasetName) { characterisationResultRepository.delete(new CharacterisationResultJPA(characterisationResult)); } + + @Override + public void resolveConflictsNative(String datasetName) { + + } + + @Override + public List listDatasets() { + return List.of("default"); + } + } diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultJPA.java b/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultJPA.java index 2306fb2..e5ec917 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultJPA.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultJPA.java @@ -2,37 +2,41 @@ import jakarta.persistence.Column; import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; import jakarta.persistence.Id; -import jakarta.persistence.IdClass; import jakarta.persistence.Table; +import org.hibernate.annotations.GenericGenerator; import rocks.artur.domain.CharacterisationResult; import rocks.artur.domain.Property; import rocks.artur.domain.ValueType; @Entity -@IdClass(CharacterisationResultID.class) @Table(name = "characterisationresult") public class CharacterisationResultJPA { + @Id - @Column(nullable = false, name = "filepath") + @GeneratedValue(generator = "uuid") + @GenericGenerator(name = "uuid", strategy = "org.hibernate.id.UUIDGenerator") + @Column(name = "id") + private String id; + @Column(nullable = false, name = "file_path") private String filePath; - @Id @Column(nullable = false) private String property; - @Id @Column(nullable = false) private String source; @Column(name = "property_value", nullable = false) private String value; - @Column(name = "valuetype", nullable = false) + @Column(name = "value_type", nullable = false) private String valueType; public CharacterisationResultJPA(CharacterisationResult characterisationResult) { + //this.id = UUID.randomUUID().toString(); this.filePath = characterisationResult.getFilePath(); this.source = characterisationResult.getSource(); this.value = characterisationResult.getValue(); @@ -40,7 +44,9 @@ public CharacterisationResultJPA(CharacterisationResult characterisationResult) this.property = characterisationResult.getProperty().toString(); } - public CharacterisationResultJPA(){} + public CharacterisationResultJPA() { + //this.id = UUID.randomUUID().toString(); + } public static CharacterisationResultJPA deepCopy(CharacterisationResultJPA characterisationResult) { CharacterisationResultJPA result = new CharacterisationResultJPA(); @@ -87,6 +93,7 @@ public void setSource(String source) { @Override public String toString() { return "CharacterisationResultJPA{" + + ", id=" + id + ", property=" + property + ", value='" + value + '\'' + ", valueType=" + valueType + @@ -104,7 +111,7 @@ public void setFilePath(String filePath) { } - public CharacterisationResult toCharacterisationResult(){ + public CharacterisationResult toCharacterisationResult() { CharacterisationResult result = new CharacterisationResult(); result.setValue(this.value); diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultRepository.java b/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultRepository.java index f423360..fa80002 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultRepository.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultRepository.java @@ -7,7 +7,7 @@ import java.util.List; @Repository -public interface CharacterisationResultRepository extends JpaRepository, CustomCharacterisationResultRepository{ +public interface CharacterisationResultRepository extends JpaRepository, CustomCharacterisationResultRepository { @Query("select property, count(*) as count from CharacterisationResultJPA group by property") List getPropertyDistribution(); diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/table/CustomCharacterisationResultRepository.java b/infra-persistence/src/main/java/rocks/artur/jpa/table/CustomCharacterisationResultRepository.java index 50b6407..2d9aa70 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/table/CustomCharacterisationResultRepository.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/table/CustomCharacterisationResultRepository.java @@ -1,10 +1,10 @@ package rocks.artur.jpa.table; -import java.util.List; +import java.util.Collection; public interface CustomCharacterisationResultRepository { - void saveFast(List results); + void saveFast(Collection results); } diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/table/CustomCharacterisationResultRepositoryImpl.java b/infra-persistence/src/main/java/rocks/artur/jpa/table/CustomCharacterisationResultRepositoryImpl.java index 52aa588..f4ef423 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/table/CustomCharacterisationResultRepositoryImpl.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/table/CustomCharacterisationResultRepositoryImpl.java @@ -5,15 +5,13 @@ import org.slf4j.LoggerFactory; import org.springframework.stereotype.Repository; import org.springframework.transaction.annotation.Transactional; -import rocks.artur.jpa.view.FilterJPA; -import java.util.List; +import java.util.Collection; @Repository public class CustomCharacterisationResultRepositoryImpl implements CustomCharacterisationResultRepository { private static final Logger LOG = LoggerFactory.getLogger(CustomCharacterisationResultRepositoryImpl.class); private final EntityManager entityManager; - private final FilterJPA filterJPA = new FilterJPA(); public CustomCharacterisationResultRepositoryImpl(EntityManager entityManager) { this.entityManager = entityManager; @@ -22,20 +20,10 @@ public CustomCharacterisationResultRepositoryImpl(EntityManager entityManager) { @Override @Transactional - public void saveFast(List results) { + public void saveFast(Collection results) { for (CharacterisationResultJPA result : results) { - CharacterisationResultID id = new CharacterisationResultID(); - id.setProperty(result.getProperty()); - id.setSource(result.getSource()); - id.setFilePath(result.getFilePath()); - CharacterisationResultJPA found = entityManager.find(CharacterisationResultJPA.class, id); - if (found == null) { - entityManager.persist(result); - } else { - entityManager.merge(result); - } - + entityManager.persist(result); } } } diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewJPA.java b/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewJPA.java index 274fa14..29fca49 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewJPA.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewJPA.java @@ -14,7 +14,7 @@ public class CharacterisationResultViewJPA { @Id - @Column(nullable = false, name = "filepath") + @Column(nullable = false, name = "file_path") private String filePath; @Id @Column(nullable = false) @@ -22,7 +22,7 @@ public class CharacterisationResultViewJPA { @Column(nullable = false, name = "property_value") private String value; - @Column(nullable = false, name = "valuetype") + @Column(nullable = false, name = "value_type") private String valueType; diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewRepository.java b/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewRepository.java index 43370ad..71d48e3 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewRepository.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewRepository.java @@ -2,7 +2,6 @@ import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.stereotype.Repository; -import rocks.artur.jpa.table.CharacterisationResultJPA; import java.util.List; @@ -11,7 +10,7 @@ public interface CharacterisationResultViewRepository extends JpaRepository, CustomCharacterisationResultViewRepository { List findByFilePath(String filePath); - @Query("select property, count(*) as count from CharacterisationResultViewJPA group by property") + @Query("select property, count(*) as count from CharacterisationResultViewJPA group by property" ) List getPropertyDistribution(); @Query("select count(*) as count from CharacterisationResultViewJPA where value='CONFLICT'") @@ -41,22 +40,6 @@ public interface CharacterisationResultViewRepository extends JpaRepository getSizeDistribution(); - - - @Query("select distinct source from CharacterisationResultJPA") List getSources(); diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepositoryImpl.java b/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepositoryImpl.java index fda89e7..7958d9b 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepositoryImpl.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepositoryImpl.java @@ -7,6 +7,7 @@ import rocks.artur.domain.CharacterisationResult; import rocks.artur.domain.FilterCriteria; import rocks.artur.domain.Property; +import org.springframework.cache.annotation.Cacheable; import java.util.List; import java.util.stream.Collectors; @@ -22,42 +23,45 @@ public CustomCharacterisationResultViewRepositoryImpl(EntityManager entityManage } @Override - public List getPropertyValueDistribution(String property, FilterCriteria filter) { + @Cacheable("distributions") + public List getPropertyValueDistribution( String property,FilterCriteria filter) { - String subquery = "select distinct FILEPATH from CHARACTERISATIONRESULTVIEW "; + String subquery = ""; if (filter != null) { subquery = filterJPA.convert(filter); + subquery = String.format(" file_path in (%s) and ", subquery); } String query = String.format( - "select PROPERTY_VALUE, count(*) " + - "from CHARACTERISATIONRESULTVIEW t " + - "join (%s) c on t.FILEPATH=c.FILEPATH " + - "where PROPERTY= '%s' group by PROPERTY_VALUE", subquery, property); + "select property, property_value, count(property_value) as number " + + "from characterisationresultview " + + "where %s property = '%s' group by property, property_value ORDER BY number desc LIMIT 200", subquery, property); + List resultList = entityManager.createNativeQuery(query).getResultList(); return resultList; } @Override + @Cacheable("timedistributions") public List getPropertyValueTimeStampDistribution(String property, FilterCriteria filter) { - String subquery = "select distinct FILEPATH from CHARACTERISATIONRESULTVIEW "; + String subquery = ""; if (filter != null) { subquery = filterJPA.convert(filter); + subquery = String.format(" file_path in (%s) and ", subquery); } //THIS IS H2-SPECIFIC SQL, BECAUSE OF PARSEDATETIME String query = String.format( - "select CASE " + - "WHEN PROPERTY_VALUE = 'CONFLICT' THEN PROPERTY_VALUE " + - "ELSE SUBSTRING(PROPERTY_VALUE,7,4) " + - "END, count(*) " + - "from CHARACTERISATIONRESULTVIEW t " + - "join (%s) c on t.FILEPATH=c.FILEPATH " + - "where PROPERTY= '%s' group by CASE " + - "WHEN PROPERTY_VALUE = 'CONFLICT' THEN PROPERTY_VALUE " + - "ELSE SUBSTRING(PROPERTY_VALUE,7,4) " + - "END", subquery, property); + "select property, CASE " + + "WHEN property_value = 'CONFLICT' THEN property_value " + + "ELSE SUBSTRING(property_value,1,4) " + + "END, count(property) as number " + + "from characterisationresultview " + + "where %s property = '%s' group by property, CASE " + + "WHEN property_value = 'CONFLICT' THEN property_value " + + "ELSE SUBSTRING(property_value,1,4) " + + "END ORDER BY number desc LIMIT 200", subquery, property); List resultList = entityManager.createNativeQuery(query).getResultList(); return resultList; @@ -65,37 +69,40 @@ public List getPropertyValueTimeStampDistribution(String property, FilterCriteri @Override public List getObjects(FilterCriteria filterCriteria) { - String subquery = "select distinct FILEPATH from CHARACTERISATIONRESULTVIEW "; + String subquery = ""; if (filterCriteria != null) { subquery = filterJPA.convert(filterCriteria); + subquery = String.format("where file_path in (%s)", subquery); } String query = String.format( - "select t.FILEPATH, count(*) " + - "from CHARACTERISATIONRESULTVIEW t " + - "join (%s) c on t.FILEPATH=c.FILEPATH " + - "group by t.FILEPATH", subquery); + "select t.FILE_PATH, count(*) " + + "from characterisationresultview t " + + " %s" + + "group by t.FILE_PATH", subquery); List resultList = entityManager.createNativeQuery(query).getResultList(); return resultList; } @Override + @Cacheable("sizedistributions") public double[] getSizeStatistics(FilterCriteria filterCriteria) { - String subquery = "select distinct FILEPATH from CHARACTERISATIONRESULTVIEW "; + String subquery = ""; if (filterCriteria != null) { subquery = filterJPA.convert(filterCriteria); + subquery = String.format(" file_path in (%s) and ", subquery); } + String query = String.format( - "select sum(cast (t.property_value as int)) as totalsize, " + - "min(cast (t.property_value as int)) as minsize, " + - "max(cast (t.property_value as int)) as maxsize, " + - "avg(cast (t.property_value as int)) as avgsize, " + + "select IFNULL(sum(cast(t.property_value as SIGNED)),0) as totalsize, " + + "IFNULL(min(cast(t.property_value as SIGNED)),0) as minsize, " + + "IFNULL(max(cast(t.property_value as SIGNED)),0) as maxsize, " + + "IFNULL(avg(cast(t.property_value as SIGNED)),0) as avgsize, " + "count(t.property_value) as count " + - "from CHARACTERISATIONRESULTVIEW t " + - "join (%s) c on t.FILEPATH=c.FILEPATH " + - "where t.PROPERTY='SIZE'", subquery); + "from characterisationresultview t " + + "where %s t.PROPERTY='SIZE'", subquery); Object[] singleResult = (Object[]) entityManager.createNativeQuery(query).getSingleResult(); Double sum = Double.valueOf(singleResult[0].toString()); @@ -110,24 +117,32 @@ public double[] getSizeStatistics(FilterCriteria filterCriteria) { @Override public double[] getConflictStatistics(FilterCriteria filterCriteria) { - String subquery = "select distinct FILEPATH from CHARACTERISATIONRESULTVIEW "; + String subquery = ""; if (filterCriteria != null) { subquery = filterJPA.convert(filterCriteria); + subquery = String.format(" file_path in (%s) and ", subquery); } + String query = String.format( - "select count(distinct t.FILEPATH) as count " + - "from CHARACTERISATIONRESULTVIEW t " + - "join (%s) c on t.FILEPATH=c.FILEPATH " + - "where t.PROPERTY_VALUE='CONFLICT'", subquery); + "select count(distinct file_path) as count " + + "from characterisationresultview " + + "where %s property_value='CONFLICT'", subquery); Long conflictsCount = (Long) entityManager.createNativeQuery(query).getSingleResult(); + String subquery2 = ""; + if (filterCriteria != null) { + subquery2 = filterJPA.convert(filterCriteria); + subquery2 = String.format("where file_path in (%s) ", subquery2); + } + String query2 = String.format( - "select count(distinct t.FILEPATH) as count " + - "from CHARACTERISATIONRESULTVIEW t " + - "join (%s) c on t.FILEPATH=c.FILEPATH ", subquery); + "select count(distinct file_path) as count " + + "from characterisationresultview " + + "%s", subquery2); + Long totalCount = (Long) entityManager.createNativeQuery(query2).getSingleResult(); @@ -141,15 +156,15 @@ public double[] getConflictStatistics(FilterCriteria filterCriteria) { @Override public List getRandomSamples(FilterCriteria filterCriteria, int sampleSize) { - String subquery = "select distinct FILEPATH from CHARACTERISATIONRESULTVIEW "; + String subquery = ""; if (filterCriteria != null) { subquery = filterJPA.convert(filterCriteria); + subquery = String.format(" where file_path in (%s) ", subquery); } - String query = String.format( - "select t.FILEPATH " + - "from CHARACTERISATIONRESULTVIEW t " + - "join (%s) c on t.FILEPATH=c.FILEPATH group by t.FILEPATH " + + "select t.FILE_PATH " + + "from characterisationresultview t " + + "%s group by t.FILE_PATH " + "ORDER BY RAND() LIMIT %d ", subquery, sampleSize); List resultList = entityManager.createNativeQuery(query).getResultList(); @@ -159,9 +174,10 @@ public List getRandomSamples(FilterCriteria filterCriteria, int sample public List getSelectiveFeatureDistributionSamples(FilterCriteria filterCriteria, List properties) { - String subquery = "select distinct FILEPATH from CHARACTERISATIONRESULTVIEW "; + String subquery = ""; if (filterCriteria != null) { subquery = filterJPA.convert(filterCriteria); + subquery = String.format(" file_path in (%s) and ", subquery); } @@ -170,7 +186,7 @@ public List getSelectiveFeatureDistributionSamples(FilterCriteria filt for (int i = 0; i < properties.size(); i++) { String currProperty = properties.get(i).name(); if (i == 0) { - select.append(String.format("count(%s.filepath) as size, min(%s.filepath) as example, %s.property_value ", currProperty, currProperty, currProperty)); + select.append(String.format("count(%s.file_path) as size, min(%s.file_path) as example, %s.property_value ", currProperty, currProperty, currProperty)); } else { select.append(String.format(", %s.property_value ", currProperty)); } @@ -182,11 +198,11 @@ public List getSelectiveFeatureDistributionSamples(FilterCriteria filt String currProperty = properties.get(i).name(); if (i == 0) { - from.append(String.format(" (SELECT v.property_value, v.filepath FROM CHARACTERISATIONRESULTVIEW v\n" + - "join (%s) c on v.FILEPATH=c.FILEPATH where v.property='%s' ) %s ", subquery, currProperty, currProperty)); + from.append(String.format(" (SELECT v.property_value, v.file_path FROM characterisationresultview v\n" + + "where %s v.property='%s' ) %s ", subquery, currProperty, currProperty)); } else { - from.append(String.format(" join (SELECT v.property_value, v.filepath FROM CHARACTERISATIONRESULTVIEW v\n" + - "join (%s) c on v.FILEPATH=c.FILEPATH where v.property='%s') %s on %s.filepath=%s.filepath ", subquery, currProperty, currProperty, properties.get(0), currProperty)); + from.append(String.format(" join (SELECT v.property_value, v.file_path FROM characterisationresultview v\n" + + "where %s v.property='%s') %s on %s.file_path=%s.file_path ", subquery, currProperty, currProperty, properties.get(0), currProperty)); } //TODO: Probably, the join is not required. Check if it is true. } diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/view/FilterJPA.java b/infra-persistence/src/main/java/rocks/artur/jpa/view/FilterJPA.java index 7f1d64f..854c275 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/view/FilterJPA.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/view/FilterJPA.java @@ -23,13 +23,13 @@ public String convert(FilterCriteria filter) { switch (property.getValueType()) { case TIMESTAMP: if (!value.equals("CONFLICT")) { - result = String.format("select distinct FILEPATH from CHARACTERISATIONRESULT where property = '%s' and PARSEDATETIME(PROPERTY_VALUE,'dd-MM-yyyy HH:mm:ss') %s PARSEDATETIME('%s','yyyy-MM-dd')", property, operator, value); + result = String.format("select distinct FILE_PATH from characterisationresult where property = '%s' and cast(PROPERTY_VALUE as DATETIME) %s cast('%s' as DATE)", property, operator, value); } else { - result = String.format("select distinct FILEPATH from CHARACTERISATIONRESULTVIEW where property = '%s' and property_value %s '%s'", property, operator, value); + result = String.format("select distinct FILE_PATH from characterisationresultview where property = '%s' and property_value %s '%s'", property, operator, value); } break; default: - result = String.format("select distinct FILEPATH from CHARACTERISATIONRESULTVIEW where property = '%s' and property_value %s '%s'", property, operator, value); + result = String.format("select distinct FILE_PATH from characterisationresultview where property = '%s' and property_value %s '%s'", property, operator, value); } return result; } else if (filter instanceof AndFilterCriteria) { diff --git a/infra-rest/src/main/java/rocks/artur/endpoints/CriteriaParser.java b/infra-rest/src/main/java/rocks/artur/endpoints/CriteriaParser.java index 32732df..ff6f5e8 100644 --- a/infra-rest/src/main/java/rocks/artur/endpoints/CriteriaParser.java +++ b/infra-rest/src/main/java/rocks/artur/endpoints/CriteriaParser.java @@ -161,7 +161,9 @@ private String extractStringTokens(String searchString, Map toke String group1 = matcher.group(1); String tokenKey="quote__"+index++; tokens.put(tokenKey, group1); - searchString = searchString.replace(group1,tokenKey); + String regexPattern = String.format("%s%s%s",quoteSign, group1, quoteSign); + searchString = searchString.replaceAll(regexPattern, String.format("%s%s%s",quoteSign, tokenKey, quoteSign)); + //searchString = searchString.replace(group1,tokenKey); // Don't use this, as it replaces all matching strings. } return searchString; } diff --git a/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java b/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java index 268af6c..28d0cec 100644 --- a/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java +++ b/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java @@ -7,6 +7,7 @@ import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; import rocks.artur.api.*; +import rocks.artur.api_impl.utils.ByteFile; import rocks.artur.domain.CharacterisationResult; import rocks.artur.domain.FilterCriteria; import rocks.artur.domain.Property; @@ -20,6 +21,7 @@ import javax.ws.rs.core.Response; import java.io.IOException; import java.text.ParseException; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -36,6 +38,7 @@ public class RestService { GetPropertyValueDistribution getPropertyValueDistribution; AnalyzePersistFile analyzePersistFile; GetCollectionStatistics getCollectionStatistics; + GetDatasetInfo getDatasetInfo; ResolveConflicts resolveConflicts; @@ -43,7 +46,7 @@ public RestService(GetProperties getProperties, GetPropertyValueDistribution getPropertyValueDistribution, AnalyzePersistFile analyzePersistFile, GetObjects getObjects, GetCollectionStatistics getCollectionStatistics, - GetSources getSources, GetSamples getSamples, ResolveConflicts resolveConflicts) { + GetSources getSources, GetSamples getSamples, ResolveConflicts resolveConflicts, GetDatasetInfo getDatasetInfo) { this.getProperties = getProperties; this.getObjects = getObjects; this.getPropertyValueDistribution = getPropertyValueDistribution; @@ -52,6 +55,7 @@ public RestService(GetProperties getProperties, this.getSources = getSources; this.getSamples = getSamples; this.resolveConflicts = resolveConflicts; + this.getDatasetInfo = getDatasetInfo; } @RequestMapping(method = RequestMethod.GET, value = "/health") @@ -60,8 +64,9 @@ public String getHealth() { } @RequestMapping(method = RequestMethod.GET, value = "/sources") - public List getSources() { - List sources = getSources.getSources(); + public List getSources( + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) { + List sources = getSources.getSources(datasetName); return sources; } @@ -72,10 +77,11 @@ public String[] getOperators() { } @RequestMapping(method = RequestMethod.GET, value = "/properties") - public List getProperties(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + public List getProperties(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); - List propertyDistribution = getProperties.getProperties(filterCriteria); + List propertyDistribution = getProperties.getProperties(filterCriteria, datasetName); return propertyDistribution; } @@ -83,38 +89,41 @@ public List getProperties(@RequestParam(name = "filter", requ @RequestMapping(method = RequestMethod.POST, value = "/object") @Consumes(MediaType.APPLICATION_JSON) public Iterable getObject( - @RequestParam(name = "filepath", required = true) @Parameter(name = "filepath", description = "Filepath of a digital object", example = "/home/user/file1") String filepath) { - Iterable objects = getObjects.getObject(filepath); + @RequestParam(name = "filepath", required = true) @Parameter(name = "filepath", description = "Filepath of a digital object", example = "/home/user/file1") String filepath, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) { + Iterable objects = getObjects.getObject(filepath, datasetName); return objects; } @RequestMapping(method = RequestMethod.POST, value = "/objectconflicts") @Consumes(MediaType.APPLICATION_JSON) public List getConflictsPerObject( - @RequestParam(name = "filepath", required = true) @Parameter(name = "filepath", description = "Filepath of a digital object", example = "/home/user/file1") String filepath) { - List objects = getObjects.getConflictsFromObject(filepath); + @RequestParam(name = "filepath", required = true) @Parameter(name = "filepath", description = "Filepath of a digital object", example = "/home/user/file1") String filepath, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) { + List objects = getObjects.getConflictsFromObject(filepath, datasetName); List collect = objects.stream().map(item -> item.getProperty()).collect(Collectors.toList()); return collect; } - - @RequestMapping(method = RequestMethod.POST, value = "/statistics") - public Map getCollectionStatistics(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + @Consumes(MediaType.APPLICATION_JSON) + public Map getCollectionStatistics(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); - Map sizeStatistics = getCollectionStatistics.getStatistics(filterCriteria); + Map sizeStatistics = getCollectionStatistics.getStatistics(filterCriteria, datasetName); return sizeStatistics; } @RequestMapping(method = RequestMethod.POST, value = "/objects") @Consumes(MediaType.APPLICATION_JSON) - public List getObjects(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + public List getObjects(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); - List objects = getObjects.getObjects(filterCriteria); + List objects = getObjects.getObjects(filterCriteria, datasetName); return objects; } @@ -123,7 +132,8 @@ public List getObjects(@RequestParam(name = "filte @Consumes(MediaType.APPLICATION_JSON) public List getPropertyValueDistribution( @RequestParam(name = "property", required = true) @Parameter(name = "property", description = "Property of a digital object", example = "FORMAT") Property property, - @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { LOG.debug("filter: " + filter); @@ -131,7 +141,7 @@ public List getPropertyValueDistribution( FilterCriteria filterCriteria = parser.parse(filter); List valueDistributionByProperty = - getPropertyValueDistribution.getPropertyValueDistribution(property, filterCriteria); + getPropertyValueDistribution.getPropertyValueDistribution(property, filterCriteria, datasetName); return valueDistributionByProperty; @@ -143,7 +153,8 @@ public List getPropertyValueDistribution( public Iterable getSamples( @RequestParam(name = "algorithm", required = true) @Parameter(name = "algorithm", description = "Sampling algorithm", example = "RANDOM") SamplingAlgorithms algorithm, @RequestParam(name = "properties", required = false) @Parameter(name = "properties", description = "A list of properties") List properties, - @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); @@ -151,7 +162,7 @@ public Iterable getSamples( getSamples.setAlgorithm(algorithm); getSamples.setProperties(properties); - Iterable objects = getSamples.getObjects(filterCriteria); + Iterable objects = getSamples.getObjects(filterCriteria, datasetName); return objects; } @@ -160,7 +171,8 @@ public Iterable getSamples( public List getSamplingInfo( @RequestParam(name = "algorithm", required = true) @Parameter(name = "algorithm", description = "Sampling algorithm", example = "RANDOM") SamplingAlgorithms algorithm, @RequestParam(name = "properties", required = false) @Parameter(name = "properties", description = "A list of properties") List properties, - @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); @@ -168,7 +180,7 @@ public List getSamplingInfo( getSamples.setAlgorithm(algorithm); getSamples.setProperties(properties); - List samplingInfo = getSamples.getSamplingInfo(filterCriteria); + List samplingInfo = getSamples.getSamplingInfo(filterCriteria, datasetName); return samplingInfo; } @@ -176,11 +188,13 @@ public List getSamplingInfo( @RequestMapping(method = RequestMethod.POST, value = "/upload", consumes = { "multipart/form-data"}) public Response ProcessFile( - @RequestParam(name = "file", required = true) @Parameter(name = "file", description = "Please select a digital object to upload") MultipartFile file) throws IOException { + @RequestParam(name = "file", required = true) @Parameter(name = "file", description = "Please select a digital object to upload") MultipartFile file, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws IOException { String filename = file.getOriginalFilename(); byte[] bytes = file.getBytes(); LOG.debug(String.format("Processing file { %s }", file.getOriginalFilename())); - Long totalCount = analyzePersistFile.uploadCharacterisationResults(bytes, filename); + ByteFile byteFile = new ByteFile(bytes, filename); + Long totalCount = analyzePersistFile.uploadCharacterisationResults(byteFile, datasetName); Response response = Response.ok(totalCount).build(); @@ -191,25 +205,30 @@ public Response ProcessFile( @RequestMapping(method = RequestMethod.POST, value = "/multipleupload", consumes = { "multipart/form-data"}) - public Response ProcessFiles(@RequestPart(name = "files", required = true) @Parameter(name = "files", description = "A list of digital objects to upload") MultipartFile[] files) throws IOException { + public Response ProcessFiles(@RequestPart(name = "files", required = true) @Parameter(name = "files", description = "A list of digital objects to upload") MultipartFile[] files, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws IOException { Long totalCount = 0L; - for (MultipartFile uploadfile : files) { - LOG.debug(String.format("Processing file { %s }", uploadfile.getOriginalFilename())); - byte[] bytes = uploadfile.getBytes(); - String filename = uploadfile.getOriginalFilename(); - totalCount += analyzePersistFile.uploadCharacterisationResults(bytes, filename); + List byteFiles = new ArrayList<>(); + for (MultipartFile file : files) { + LOG.debug(String.format("Processing file { %s }", file.getOriginalFilename())); + ByteFile byteFile = new ByteFile(file.getBytes(), file.getOriginalFilename()); + byteFiles.add(byteFile); } - - Response response = - Response.ok(totalCount).build(); - + analyzePersistFile.uploadCharacterisationResults(byteFiles, datasetName); + Response response = Response.ok(totalCount).build(); return response; } @RequestMapping(method = RequestMethod.POST, value = "/resolveconflicts") @Consumes(MediaType.APPLICATION_JSON) - public void resolveConflicts() throws ParseException { - resolveConflicts.run(); + public void resolveConflicts(@RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { + resolveConflicts.run(datasetName); + } + + @RequestMapping(method = RequestMethod.GET, value = "/datasets") + @Consumes(MediaType.APPLICATION_JSON) + public List listDatasets() { + return getDatasetInfo.listDatasets(); } } diff --git a/main/pom.xml b/main/pom.xml index 48b68e1..80accc8 100644 --- a/main/pom.xml +++ b/main/pom.xml @@ -32,21 +32,19 @@ 0.1.0 compile - org.springframework.boot spring-boot-starter-web - org.springframework.boot - spring-boot-starter-data-jpa + net.ttddyy + datasource-proxy + 1.4.1 - - junit junit diff --git a/main/src/main/java/rocks/artur/App.java b/main/src/main/java/rocks/artur/App.java index 9b6751e..6fd51fb 100644 --- a/main/src/main/java/rocks/artur/App.java +++ b/main/src/main/java/rocks/artur/App.java @@ -1,5 +1,7 @@ package rocks.artur; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.config.BeanFactoryPostProcessor; import org.springframework.beans.factory.support.BeanDefinitionRegistry; import org.springframework.boot.SpringApplication; @@ -8,13 +10,14 @@ import org.springframework.context.annotation.ClassPathBeanDefinitionScanner; import org.springframework.core.type.classreading.MetadataReader; import org.springframework.core.type.classreading.MetadataReaderFactory; -import org.springframework.core.type.filter.TypeFilter; -import org.springframework.data.jpa.repository.config.EnableJpaRepositories; + +import java.util.Arrays; @SpringBootApplication public class App { + private static final Logger LOG = LoggerFactory.getLogger(App.class); public static void main(String[] args) { SpringApplication.run(App.class); } @@ -23,22 +26,26 @@ public static void main(String[] args) { BeanFactoryPostProcessor beanFactoryPostProcessor() { return beanFactory -> genericApplicationContext((BeanDefinitionRegistry) beanFactory); } - void genericApplicationContext(BeanDefinitionRegistry beanRegistry) { ClassPathBeanDefinitionScanner beanDefinitionScanner = new ClassPathBeanDefinitionScanner(beanRegistry); - beanDefinitionScanner.addIncludeFilter(removeModelAndEntitiesFilter()); - beanDefinitionScanner.scan("rocks.artur.api","rocks.artur.api_impl","rocks.artur.FITSClient","rocks.artur.jpa","rocks.artur.endpoints.RestService"); + String profile = System.getenv("DB_SELECTOR") == null ? System.getProperty("spring.profiles.active", "h2") : System.getenv("DB_SELECTOR"); + System.out.println(profile); + beanDefinitionScanner.addIncludeFilter(App::match); + String[] packages; + switch (profile) { + case "clickhouse" -> + packages = new String[]{"rocks.artur.api", "rocks.artur.api_impl", "rocks.artur.FITSClient", "rocks.artur.endpoints.RestService", "rocks.artur.clickhouse"}; + case "h2", "mysql" -> + packages = new String[]{"rocks.artur.api", "rocks.artur.api_impl", "rocks.artur.FITSClient", "rocks.artur.endpoints.RestService", "rocks.artur.jpa"}; + default -> + throw new UnsupportedOperationException("The selected db is not supported. Choose one from [clickhouse, mysql, h2]"); + } + beanDefinitionScanner.scan(packages); } - - static TypeFilter removeModelAndEntitiesFilter() { - return (MetadataReader mr, MetadataReaderFactory mrf) -> { - return !mr.getClassMetadata() - .getClassName() - .startsWith("rocks.artur.domain") && - !mr.getClassMetadata() - .getClassName() - .startsWith("rocks.artur.api_impl.filter") - ; - }; + private static boolean match(MetadataReader mr, MetadataReaderFactory mrf) { + String className = mr.getClassMetadata().getClassName(); + LOG.debug(className); + String[] packagesToIgnore = new String[]{"rocks.artur.domain", "rocks.artur.api_impl.filter", "rocks.artur.api_impl.utils"}; + return Arrays.stream(packagesToIgnore).noneMatch(className::startsWith); } } \ No newline at end of file diff --git a/main/src/main/java/rocks/artur/DatasourceProxyBeanPostProcessor.java b/main/src/main/java/rocks/artur/DatasourceProxyBeanPostProcessor.java new file mode 100644 index 0000000..d60b215 --- /dev/null +++ b/main/src/main/java/rocks/artur/DatasourceProxyBeanPostProcessor.java @@ -0,0 +1,51 @@ +package rocks.artur; + +import java.lang.reflect.Method; +import javax.sql.DataSource; +import net.ttddyy.dsproxy.support.ProxyDataSourceBuilder; +import org.aopalliance.intercept.MethodInterceptor; +import org.aopalliance.intercept.MethodInvocation; +import org.springframework.aop.framework.ProxyFactory; +import org.springframework.beans.BeansException; +import org.springframework.beans.factory.config.BeanPostProcessor; +import org.springframework.stereotype.Component; +import org.springframework.util.ReflectionUtils; + +//@Component +public class DatasourceProxyBeanPostProcessor implements BeanPostProcessor { + + @Override + public Object postProcessBeforeInitialization(final Object bean, final String beanName) throws BeansException { + return bean; + } + + @Override + public Object postProcessAfterInitialization(final Object bean, final String beanName) throws BeansException { + if (bean instanceof DataSource) { + ProxyFactory factory = new ProxyFactory(bean); + factory.setProxyTargetClass(true); + factory.addAdvice(new ProxyDataSourceInterceptor((DataSource) bean)); + return factory.getProxy(); + } + + return bean; + } + + private static class ProxyDataSourceInterceptor implements MethodInterceptor { + + private final DataSource dataSource; + + public ProxyDataSourceInterceptor(final DataSource dataSource) { + this.dataSource = ProxyDataSourceBuilder.create(dataSource).name("Batch-Insert-Logger").asJson().countQuery().logQueryToSysOut().build(); + } + + @Override + public Object invoke(final MethodInvocation invocation) throws Throwable { + Method proxyMethod = ReflectionUtils.findMethod(dataSource.getClass(), invocation.getMethod().getName()); + if (proxyMethod != null) { + return proxyMethod.invoke(dataSource, invocation.getArguments()); + } + return invocation.proceed(); + } + } +} \ No newline at end of file diff --git a/main/src/main/java/rocks/artur/WebConfig.java b/main/src/main/java/rocks/artur/WebConfig.java new file mode 100644 index 0000000..afb3fc7 --- /dev/null +++ b/main/src/main/java/rocks/artur/WebConfig.java @@ -0,0 +1,17 @@ +package rocks.artur; + +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.CorsRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +//@Configuration +public class WebConfig implements WebMvcConfigurer { + + @Override + public void addCorsMappings(CorsRegistry registry) { + registry.addMapping("/**") + .allowedOrigins("*") + .allowedMethods("GET", "POST", "PUT", "DELETE") + .allowedHeaders("*"); + } +} \ No newline at end of file diff --git a/main/src/main/resources/application-clickhouse.properties b/main/src/main/resources/application-clickhouse.properties new file mode 100644 index 0000000..d47113c --- /dev/null +++ b/main/src/main/resources/application-clickhouse.properties @@ -0,0 +1,30 @@ +spring.datasource.url=jdbc:clickhouse://db-docker:8123/default +spring.datasource.driverClassName=com.clickhouse.jdbc.ClickHouseDriver +spring.datasource.username=default +spring.datasource.password= + +spring.autoconfigure.exclude= \ +org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration, \ +org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration, \ +org.springframework.boot.autoconfigure.jdbc.DataSourceTransactionManagerAutoConfiguration + +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQLDialect + +#spring.sql.init.mode=always +#spring.sql.init.platform=h2 + +#spring.datasource.url=jdbc:h2:mem:default;DB_CLOSE_DELAY=-1 +#spring.datasource.driverClassName=org.h2.Driver +#spring.datasource.username=sa +#spring.datasource.password= + +#spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +#spring.jpa.hibernate.ddl-auto=none +#spring.h2.console.enabled=true +# default path: h2-console + +spring.servlet.multipart.max-file-size=1GB +spring.servlet.multipart.max-request-size=1GB +#spring.jpa.properties.hibernate.jdbc.batch_size=1000 +#spring.jpa.properties.hibernate.order_inserts=true +#spring.jpa.properties.hibernate.order_updates=true diff --git a/main/src/main/resources/application-h2.properties b/main/src/main/resources/application-h2.properties new file mode 100644 index 0000000..95628b2 --- /dev/null +++ b/main/src/main/resources/application-h2.properties @@ -0,0 +1,18 @@ +spring.sql.init.mode=always +#spring.sql.init.platform=h2 + +spring.datasource.url=jdbc:h2:mem:default;DB_CLOSE_DELAY=-1 +spring.datasource.driverClassName=org.h2.Driver +spring.datasource.username=sa +spring.datasource.password= + +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +spring.jpa.hibernate.ddl-auto=none +spring.h2.console.enabled=true +# default path: h2-console + +spring.servlet.multipart.max-file-size=1GB +spring.servlet.multipart.max-request-size=1GB +spring.jpa.properties.hibernate.jdbc.batch_size=1000 +spring.jpa.properties.hibernate.order_inserts=true +spring.jpa.properties.hibernate.order_updates=true diff --git a/main/src/main/resources/application-mysql.properties b/main/src/main/resources/application-mysql.properties new file mode 100644 index 0000000..c11a2ed --- /dev/null +++ b/main/src/main/resources/application-mysql.properties @@ -0,0 +1,23 @@ +spring.sql.init.mode=always +spring.jpa.hibernate.ddl-auto=none + +spring.datasource.url=jdbc:mysql://db-docker:3306/fitsinn +spring.datasource.username=user +spring.datasource.password=pass +spring.datasource.driverClassName=com.mysql.jdbc.Driver +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQLDialect + + +spring.jpa.show-sql=false + +spring.h2.console.enabled=true + +spring.servlet.multipart.max-file-size=1GB +spring.servlet.multipart.max-request-size=1GB +#logging.level.org.hibernate.SQL=DEBUG +#logging.level.org.hibernate.type.descriptor.sql.BasicBinder=TRACE + + +spring.jpa.properties.hibernate.jdbc.batch_size=1000 +spring.jpa.properties.hibernate.order_inserts=true +spring.jpa.properties.hibernate.order_updates=true \ No newline at end of file diff --git a/main/src/main/resources/application.properties b/main/src/main/resources/application.properties index f64ae82..e7209de 100644 --- a/main/src/main/resources/application.properties +++ b/main/src/main/resources/application.properties @@ -1,25 +1,2 @@ -spring.profiles.active=@active.profile@ - - -spring.datasource.url=jdbc:h2:mem:default;DB_CLOSE_DELAY=-1 -spring.datasource.driverClassName=org.h2.Driver -spring.datasource.username=sa -spring.datasource.password= - -spring.jpa.show-sql=false -#logging.level.org.hibernate.SQL=DEBUG -#logging.level.org.hibernate.type.descriptor.sql.BasicBinder=TRACE -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect -spring.jpa.hibernate.ddl-auto=none -spring.sql.init.mode=always -spring.h2.console.enabled=true -# default path: h2-console - -spring.servlet.multipart.max-file-size=1GB -spring.servlet.multipart.max-request-size=1GB -#server.port=8080 - - -spring.jpa.properties.hibernate.jdbc.batch_size=50 -spring.jpa.properties.hibernate.order_inserts=true -spring.jpa.properties.hibernate.order_updates=true +#this defines what application.properties to use in run-time +spring.profiles.active=${DB_SELECTOR:h2} diff --git a/main/src/main/resources/data.sql b/main/src/main/resources/data.sql index 49d7467..ddada94 100644 --- a/main/src/main/resources/data.sql +++ b/main/src/main/resources/data.sql @@ -1,15 +1,15 @@ -INSERT INTO characterisationresult (filePath, property, source, property_value, valueType) +INSERT INTO characterisationresult (id, file_path, property, source, property_value, value_type) VALUES -('/home/artur', 'FORMAT', 'file utility:5.03', 'Portable Document Format', 'STRING'), -('/home/artur', 'EXTERNALIDENTIFIER', 'Droid:3', 'fmt/18', 'STRING'), -('/home/artur', 'MIMETYPE', 'TIKA:2', 'application/doc', 'STRING'), -('/home/artur', 'MIMETYPE', 'file utility:5.03', 'application/doc', 'STRING'), -('/home/artur', 'SIZE', 'Jhove:1.21', '4', 'INTEGER'), -('/home/artur2', 'FORMAT', 'Droid:3', 'JPEG File Interchange Format', 'STRING'), -('/home/artur2', 'MIMETYPE', 'Droid:3', 'image/jpeg', 'STRING'), -('/home/artur2', 'SIZE', 'Jhove:1.21', '43', 'INTEGER'), -('/home/artur3', 'SIZE', 'Jhove:1.21', '10000', 'INTEGER'); +(1, '/home/artur', 'FORMAT', 'file utility:5.03', 'Portable Document Format', 'STRING'), +(2, '/home/artur', 'EXTERNALIDENTIFIER', 'Droid:3', 'fmt/18', 'STRING'), +(3, '/home/artur', 'MIMETYPE', 'TIKA:2', 'application/doc', 'STRING'), +(4, '/home/artur', 'MIMETYPE', 'file utility:5.03', 'application/doc', 'STRING'), +(5, '/home/artur', 'SIZE', 'Jhove:1.21', '4', 'INTEGER'), +(6, '/home/artur2', 'FORMAT', 'Droid:3', 'JPEG File Interchange Format', 'STRING'), +(7, '/home/artur2', 'MIMETYPE', 'Droid:3', 'image/jpeg', 'STRING'), +(8, '/home/artur2', 'SIZE', 'Jhove:1.21', '43', 'INTEGER'), +(9, '/home/artur3', 'SIZE', 'Jhove:1.21', '10000', 'INTEGER'); diff --git a/main/src/main/resources/schema.sql b/main/src/main/resources/schema.sql index c86ad4b..436182d 100644 --- a/main/src/main/resources/schema.sql +++ b/main/src/main/resources/schema.sql @@ -1,24 +1,39 @@ -DROP ALL OBJECTS; +DROP VIEW IF EXISTS characterisationresultview; +DROP TABLE IF EXISTS characterisationresult; CREATE TABLE characterisationresult ( -id INTEGER NOT NULL AUTO_INCREMENT, -filePath varchar(400) NOT NULL, -property varchar(200) NOT NULL, -source varchar(200) NOT NULL, -property_value varchar(400) NOT NULL, -valueType varchar(200) NOT NULL, -CONSTRAINT PK_Characterisationresult PRIMARY KEY (filePath,property, source) +id varchar(255) NOT NULL, +file_path varchar(255) NOT NULL, +property varchar(255) NOT NULL, +source varchar(255) NOT NULL, +property_value varchar(255) NOT NULL, +value_type varchar(255) NOT NULL, +PRIMARY KEY ( id ) ); -CREATE INDEX idx_characterisationresult_filepath - ON characterisationresult (filePath); + +CREATE INDEX idx_characterisationresult_1 + ON characterisationresult ( property, property_value); + +CREATE INDEX idx_characterisationresult_2 + ON characterisationresult ( source); + +CREATE INDEX idx_characterisationresult_3 + ON characterisationresult ( value_type, file_path); + +CREATE INDEX idx_characterisationresult_4 + ON characterisationresult (property_value); + +CREATE INDEX idx_characterisationresult_5 + ON characterisationresult (file_path, property, value_type); + CREATE VIEW characterisationresultview AS -SELECT t.filePath, t.property, t.valueType, +SELECT t.file_path, t.property, t.value_type, CASE WHEN COUNT(distinct t.property_value) = 1 THEN MIN(t.property_value) ELSE 'CONFLICT' END AS property_value FROM characterisationresult t -GROUP BY t.filePath, t.property,t.valueType; +GROUP BY t.file_path, t.property,t.value_type; diff --git a/main/src/test/java/rocks/artur/CRHResolveConflictsImplTest.java b/main/src/test/java/rocks/artur/CRHResolveConflictsImplTest.java new file mode 100644 index 0000000..320ce82 --- /dev/null +++ b/main/src/test/java/rocks/artur/CRHResolveConflictsImplTest.java @@ -0,0 +1,43 @@ +package rocks.artur; + + +import org.junit.jupiter.api.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.ActiveProfiles; +import org.springframework.test.context.junit4.SpringRunner; +import rocks.artur.api_impl.CRH_ResolveConflictsImpl; +import rocks.artur.domain.CharacterisationResult; +import rocks.artur.jpa.CharacterisationResultGatewayJpaImpl; + +import java.util.ArrayList; +import java.util.List; + +@ActiveProfiles("h2") +@RunWith(SpringRunner.class) +@SpringBootTest +class CRHResolveConflictsImplTest { + + @Autowired + CharacterisationResultGatewayJpaImpl characterisationResultGatewaySqlImpl; + + @Autowired + CRH_ResolveConflictsImpl resolveConflicts; + @Test + void getAllTest() { + + Iterable characterisationResults = + characterisationResultGatewaySqlImpl.getCharacterisationResults(null, ""); + double conflictRateBefore = characterisationResultGatewaySqlImpl.getConflictRate(""); + List list = new ArrayList<>(); + characterisationResults.forEach(list::add); + //Assert.assertEquals(10, list.size()); + + //resolveConflicts.run(); + + double conflictRateAfter = characterisationResultGatewaySqlImpl.getConflictRate(""); + + System.out.println(String.format("Conflict rate: before - %4.3f, after - %4.3f", conflictRateBefore, conflictRateAfter)); + } +} \ No newline at end of file diff --git a/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java b/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java index 9cd38da..2f5cfa2 100644 --- a/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java +++ b/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java @@ -23,7 +23,7 @@ import java.util.List; import java.util.Map; -@ActiveProfiles("dev") +@ActiveProfiles("h2") @RunWith(SpringRunner.class) @SpringBootTest class CharacterisationResultGatewayImplTest { @@ -35,7 +35,7 @@ class CharacterisationResultGatewayImplTest { void getAllTest() { Iterable characterisationResults = - characterisationResultGatewaySqlImpl.getCharacterisationResults(null); + characterisationResultGatewaySqlImpl.getCharacterisationResults(null, ""); List list = new ArrayList<>(); characterisationResults.forEach(list::add); @@ -45,7 +45,7 @@ void getAllTest() { @Test void getPropertyDistributionTest() { - List propertyDistribution = characterisationResultGatewaySqlImpl.getPropertyDistribution(null); + List propertyDistribution = characterisationResultGatewaySqlImpl.getPropertyDistribution(null, ""); Assert.assertEquals(4, propertyDistribution.size()); } @@ -54,7 +54,7 @@ void getPropertyValueDistributionWithFilterTest() throws ParseException { String typeFilter = "FORMAT=\"Portable Document Format\""; CriteriaParser parser = new CriteriaParser(); FilterCriteria parse = parser.parse(typeFilter); - List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.FORMAT, parse); + List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.FORMAT, parse, ""); System.out.println(propertyValueDistribution); Assert.assertEquals(1, propertyValueDistribution.size()); } @@ -62,9 +62,9 @@ void getPropertyValueDistributionWithFilterTest() throws ParseException { @Test void getPropertyValueDistributionWithoutFilterTest() { - List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.FORMAT, null); + List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.FORMAT, null, ""); System.out.println(propertyValueDistribution); - List characterisationResults = characterisationResultGatewaySqlImpl.getCharacterisationResults(null); + List characterisationResults = characterisationResultGatewaySqlImpl.getCharacterisationResults(null, ""); System.out.println(characterisationResults); Assert.assertEquals(3, propertyValueDistribution.size()); } @@ -73,14 +73,14 @@ void getPropertyValueDistributionWithoutFilterTest() { @Test void getPropertyValueFloatDistributionWithoutFilterTest() { - List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.SIZE, null); + List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.SIZE, null, ""); Assert.assertEquals(2, propertyValueDistribution.size()); } @Test void getPropertyValueDistributionWithoutFilterCONFLICTTest() { - List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.MIMETYPE, null); + List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.MIMETYPE, null, ""); Assert.assertEquals(2, propertyValueDistribution.size()); boolean conflict = propertyValueDistribution.stream().anyMatch(propertyValueStatistic -> propertyValueStatistic.getValue().equals("CONFLICT")); Assert.assertFalse(conflict); @@ -89,7 +89,7 @@ void getPropertyValueDistributionWithoutFilterCONFLICTTest() { @Test void getCharacterisationResultsByFilepathTest() { Iterable propertyValueStatistics = - characterisationResultGatewaySqlImpl.getCharacterisationResultsByFilepath("/home/artur"); + characterisationResultGatewaySqlImpl.getCharacterisationResultsByFilepath("/home/artur", ""); List list = new ArrayList<>(); propertyValueStatistics.forEach(list::add); @@ -102,7 +102,7 @@ void getCollectionStatisticsWithoutFilterTest() throws ParseException { String typeFilter = "FORMAT=\"Portable Document Format\""; CriteriaParser parser = new CriteriaParser(); FilterCriteria parse = parser.parse(typeFilter); - Map sizeStatistics = characterisationResultGatewaySqlImpl.getCollectionStatistics(null); + Map sizeStatistics = characterisationResultGatewaySqlImpl.getCollectionStatistics(null, ""); Assert.assertEquals(10047.0, sizeStatistics.get("totalSize"), 0.1); System.out.println(sizeStatistics); } @@ -112,7 +112,7 @@ void getCollectionStatisticsWithFilterTest() throws ParseException { String typeFilter = "FORMAT=\"Portable Document Format\""; CriteriaParser parser = new CriteriaParser(); FilterCriteria parse = parser.parse(typeFilter); - Map sizeStatistics = characterisationResultGatewaySqlImpl.getCollectionStatistics(parse); + Map sizeStatistics = characterisationResultGatewaySqlImpl.getCollectionStatistics(parse, ""); Assert.assertEquals(4.0, sizeStatistics.get("totalSize"), 0.1); System.out.println(sizeStatistics); } @@ -122,7 +122,7 @@ void getCollectionStatisticsWithFilterTest() throws ParseException { void getRandomSamplesTest() { List properties = new ArrayList<>(); properties.add(Property.FORMAT); - List samples = characterisationResultGatewaySqlImpl.getSamples(null, SamplingAlgorithms.RANDOM, properties); + List samples = characterisationResultGatewaySqlImpl.getSamples(null, SamplingAlgorithms.RANDOM, properties, ""); Assert.assertEquals(5, samples.size()); } @@ -130,33 +130,33 @@ void getRandomSamplesTest() { void getSFDSamplesTest() { List properties = new ArrayList<>(); properties.add(Property.FORMAT); - List samples = characterisationResultGatewaySqlImpl.getSamples(null, SamplingAlgorithms.SELECTIVE_FEATURE_DISTRIBUTION, properties); + List samples = characterisationResultGatewaySqlImpl.getSamples(null, SamplingAlgorithms.SELECTIVE_FEATURE_DISTRIBUTION, properties, ""); Assert.assertEquals(3, samples.size()); } @Test void getConflictRateTest() { - double conflictRate = characterisationResultGatewaySqlImpl.getConflictRate(); + double conflictRate = characterisationResultGatewaySqlImpl.getConflictRate(""); Assert.assertEquals(0.4,conflictRate, 0.01); } @Test void getConflictsByFilepathTest() { - List filepathProperty = characterisationResultGatewaySqlImpl.getConflictEntries(); + List filepathProperty = characterisationResultGatewaySqlImpl.getConflictEntries(""); Assert.assertEquals(2,filepathProperty.size()); } @Test void getCharacterisationResultsByFilepathPropertyTest() { - List filepathProperty = characterisationResultGatewaySqlImpl.getEntries(); + List filepathProperty = characterisationResultGatewaySqlImpl.getEntries(""); List results = new ArrayList<>(); for (Entry strings : filepathProperty) { - List characterisationResultsByFilepathProperty = characterisationResultGatewaySqlImpl.getCharacterisationResultsByEntry(strings); + List characterisationResultsByFilepathProperty = characterisationResultGatewaySqlImpl.getCharacterisationResultsByEntry(strings, ""); results.addAll(characterisationResultsByFilepathProperty); } diff --git a/main/src/test/java/rocks/artur/RestServiceTest.java b/main/src/test/java/rocks/artur/RestServiceTest.java index 82e12eb..f7f574a 100644 --- a/main/src/test/java/rocks/artur/RestServiceTest.java +++ b/main/src/test/java/rocks/artur/RestServiceTest.java @@ -21,7 +21,7 @@ import static org.mockserver.model.HttpRequest.request; import static org.mockserver.model.HttpResponse.response; -@ActiveProfiles("dev") +@ActiveProfiles("h2") @RunWith(SpringRunner.class) @SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.DEFINED_PORT) class RestServiceTest { @@ -102,6 +102,7 @@ void emptyTest() { @Test void getCollectionStatisticsTest() { String str = given().port(port) + .param("datasetName", "default") .when().post("/statistics") .then() .statusCode(200).extract().asString(); @@ -112,6 +113,7 @@ void getCollectionStatisticsTest() { @Test void getPropertiesTest() { String str = given().port(port) + .param("datasetName", "default") .when().get("/properties") .then() .statusCode(200).extract().asString(); @@ -122,6 +124,7 @@ void getPropertiesTest() { @Test void getSourcesTest() { String str = given().port(port) + .param("datasetName", "default") .when().get("/sources") .then() .statusCode(200).extract().asString(); @@ -139,6 +142,7 @@ void getOperatorsTest() { @Test void getObjectTest() { String str = given().port(port).param("filepath","/home/artur/file1") + .param("datasetName", "default") .when().post("/object") .then() .statusCode(200).extract().asString(); @@ -148,6 +152,7 @@ void getObjectTest() { @Test void getObjectsTest() { String str = given().port(port).param("filter", " format='docx' OR format='pdf'") + .param("datasetName", "default") .when().post("/objects") .then() .statusCode(200).extract().asString(); @@ -157,6 +162,7 @@ void getObjectsTest() { @Test void getObjectConflictsTest() { String str = given().port(port).param("filepath","/home/artur/file1") + .param("datasetName", "default") .when().post("/objectconflicts") .then() .statusCode(200).extract().asString(); @@ -169,6 +175,7 @@ void getPropertyDistributionWithFilterTest() { String str = given().port(port) .param("filter", "FORMAT=\"Portable Document Format\"") .param("property", "FORMAT") + .param("datasetName", "default") .when().post("/propertyvalues") .then() .statusCode(200).extract().asString(); @@ -179,6 +186,7 @@ void getPropertyDistributionWithFilterTest() { void getPropertyDistributionWithoutFilterTest() { String str = given().port(port) .param("property", "FORMAT") + .param("datasetName", "default") .when().post("/propertyvalues") .then() .statusCode(200).extract().asString(); @@ -211,7 +219,7 @@ void uploadFileTest() { //Then, I call my /upload endpoint, where a FITS XML is generated and the char results uploaded into DB - given().port(port).multiPart("file",file) + given().port(port).param("datasetName", "default").multiPart("file",file) .when().post("/upload") .then() .statusCode(200).extract().asString(); @@ -223,6 +231,7 @@ void uploadFileTest() { given().port(port) .param("filepath", "/usr/local/tomcat/webapps/fits/upload/1582118786085/README.md") + .param("datasetName", "default") .when().post("/object") .then() .statusCode(200).extract().asString(); @@ -234,6 +243,7 @@ void uploadFileTest() { @Test void resolveConflictsTest() { String str = given().port(port) + .param("datasetName", "default") .when().post("/resolveconflicts") .then() .statusCode(200).extract().asString(); diff --git a/main/src/test/java/rocks/artur/endpoints/CriteriaParserTest.java b/main/src/test/java/rocks/artur/endpoints/CriteriaParserTest.java index af64ce5..fe22c4b 100644 --- a/main/src/test/java/rocks/artur/endpoints/CriteriaParserTest.java +++ b/main/src/test/java/rocks/artur/endpoints/CriteriaParserTest.java @@ -96,6 +96,22 @@ void SpELTest() throws ParseException { System.out.println(parse); } + + + @Test + void BrokenCaseTest() throws ParseException { + + String s = "FORMAT == \"Hypertext Markup Language\" && FORMAT_VERSION == \"Hypertext Markup Language HTML 3.2\""; + + CriteriaParser parser = new CriteriaParser(); + + FilterCriteria parse = parser.parse(s); + + System.out.println(parse); + + Assert.assertEquals("AndFilterCriteria{criteria=SingleFilterCriteria{searchKey=FORMAT_VERSION, operation=EQUAL, searchValue='Hypertext Markup Language HTML 3.2'}, otherCriteria=SingleFilterCriteria{searchKey=FORMAT, operation=EQUAL, searchValue='Hypertext Markup Language'}}", parse.toString()); + } + } diff --git a/main/src/test/resources/application.properties b/main/src/test/resources/application.properties index 304cf82..14950ed 100644 --- a/main/src/test/resources/application.properties +++ b/main/src/test/resources/application.properties @@ -1,4 +1,5 @@ - +spring.sql.init.mode=always +spring.jpa.hibernate.ddl-auto=none spring.datasource.url=jdbc:h2:mem:default;DB_CLOSE_DELAY=-1 spring.datasource.driverClassName=org.h2.Driver @@ -9,8 +10,8 @@ spring.jpa.show-sql=false #logging.level.org.hibernate.SQL=DEBUG #logging.level.org.hibernate.type.descriptor.sql.BasicBinder=TRACE spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect -spring.jpa.hibernate.ddl-auto=none -spring.sql.init.mode=always + + spring.h2.console.enabled=true # default path: h2-console diff --git a/main/src/test/resources/data.sql b/main/src/test/resources/data.sql index 549fa13..600a4c7 100644 --- a/main/src/test/resources/data.sql +++ b/main/src/test/resources/data.sql @@ -1,12 +1,12 @@ -INSERT INTO characterisationresult (filePath, property, source, property_value, valueType) +INSERT INTO characterisationresult (id, file_path, property, source, property_value, value_type) VALUES -('/home/conftest1', 'FORMAT', 'file utility:5.03', 'Portable Document Format', 'STRING'), -('/home/conftest1', 'FORMAT', 'Droid:3', 'MS Word', 'STRING'), +(16,'/home/conftest1', 'FORMAT', 'file utility:5.03', 'Portable Document Format', 'STRING'), +(11,'/home/conftest1', 'FORMAT', 'Droid:3', 'MS Word', 'STRING'), -('/home/conftest2', 'EXTERNALIDENTIFIER', 'Jhove:1.21', 'fmt/18', 'STRING'), -('/home/conftest2', 'EXTERNALIDENTIFIER', 'TIKA:2', 'fmt/20', 'STRING'), -('/home/conftest2', 'EXTERNALIDENTIFIER', 'Droid:3', 'fmt/20', 'STRING'); +(12, '/home/conftest2', 'EXTERNALIDENTIFIER', 'Jhove:1.21', 'fmt/18', 'STRING'), +(13, '/home/conftest2', 'EXTERNALIDENTIFIER', 'TIKA:2', 'fmt/20', 'STRING'), +(14, '/home/conftest2', 'EXTERNALIDENTIFIER', 'Droid:3', 'fmt/20', 'STRING'); diff --git a/mvnw b/mvnw index d2f0ea3..8d937f4 100755 --- a/mvnw +++ b/mvnw @@ -19,7 +19,7 @@ # ---------------------------------------------------------------------------- # ---------------------------------------------------------------------------- -# Maven2 Start Up Batch script +# Apache Maven Wrapper startup batch script, version 3.2.0 # # Required ENV vars: # ------------------ @@ -27,7 +27,6 @@ # # Optional ENV vars # ----------------- -# M2_HOME - location of maven2's installed home dir # MAVEN_OPTS - parameters passed to the Java VM when running Maven # e.g. to debug Maven itself, use # set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 @@ -36,6 +35,10 @@ if [ -z "$MAVEN_SKIP_RC" ] ; then + if [ -f /usr/local/etc/mavenrc ] ; then + . /usr/local/etc/mavenrc + fi + if [ -f /etc/mavenrc ] ; then . /etc/mavenrc fi @@ -50,7 +53,7 @@ fi cygwin=false; darwin=false; mingw=false -case "`uname`" in +case "$(uname)" in CYGWIN*) cygwin=true ;; MINGW*) mingw=true;; Darwin*) darwin=true @@ -58,9 +61,9 @@ case "`uname`" in # See https://developer.apple.com/library/mac/qa/qa1170/_index.html if [ -z "$JAVA_HOME" ]; then if [ -x "/usr/libexec/java_home" ]; then - export JAVA_HOME="`/usr/libexec/java_home`" + JAVA_HOME="$(/usr/libexec/java_home)"; export JAVA_HOME else - export JAVA_HOME="/Library/Java/Home" + JAVA_HOME="/Library/Java/Home"; export JAVA_HOME fi fi ;; @@ -68,68 +71,38 @@ esac if [ -z "$JAVA_HOME" ] ; then if [ -r /etc/gentoo-release ] ; then - JAVA_HOME=`java-config --jre-home` + JAVA_HOME=$(java-config --jre-home) fi fi -if [ -z "$M2_HOME" ] ; then - ## resolve links - $0 may be a link to maven's home - PRG="$0" - - # need this for relative symlinks - while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG="`dirname "$PRG"`/$link" - fi - done - - saveddir=`pwd` - - M2_HOME=`dirname "$PRG"`/.. - - # make it fully qualified - M2_HOME=`cd "$M2_HOME" && pwd` - - cd "$saveddir" - # echo Using m2 at $M2_HOME -fi - # For Cygwin, ensure paths are in UNIX format before anything is touched if $cygwin ; then - [ -n "$M2_HOME" ] && - M2_HOME=`cygpath --unix "$M2_HOME"` [ -n "$JAVA_HOME" ] && - JAVA_HOME=`cygpath --unix "$JAVA_HOME"` + JAVA_HOME=$(cygpath --unix "$JAVA_HOME") [ -n "$CLASSPATH" ] && - CLASSPATH=`cygpath --path --unix "$CLASSPATH"` + CLASSPATH=$(cygpath --path --unix "$CLASSPATH") fi # For Mingw, ensure paths are in UNIX format before anything is touched if $mingw ; then - [ -n "$M2_HOME" ] && - M2_HOME="`(cd "$M2_HOME"; pwd)`" - [ -n "$JAVA_HOME" ] && - JAVA_HOME="`(cd "$JAVA_HOME"; pwd)`" + [ -n "$JAVA_HOME" ] && [ -d "$JAVA_HOME" ] && + JAVA_HOME="$(cd "$JAVA_HOME" || (echo "cannot cd into $JAVA_HOME."; exit 1); pwd)" fi if [ -z "$JAVA_HOME" ]; then - javaExecutable="`which javac`" - if [ -n "$javaExecutable" ] && ! [ "`expr \"$javaExecutable\" : '\([^ ]*\)'`" = "no" ]; then + javaExecutable="$(which javac)" + if [ -n "$javaExecutable" ] && ! [ "$(expr "\"$javaExecutable\"" : '\([^ ]*\)')" = "no" ]; then # readlink(1) is not available as standard on Solaris 10. - readLink=`which readlink` - if [ ! `expr "$readLink" : '\([^ ]*\)'` = "no" ]; then + readLink=$(which readlink) + if [ ! "$(expr "$readLink" : '\([^ ]*\)')" = "no" ]; then if $darwin ; then - javaHome="`dirname \"$javaExecutable\"`" - javaExecutable="`cd \"$javaHome\" && pwd -P`/javac" + javaHome="$(dirname "\"$javaExecutable\"")" + javaExecutable="$(cd "\"$javaHome\"" && pwd -P)/javac" else - javaExecutable="`readlink -f \"$javaExecutable\"`" + javaExecutable="$(readlink -f "\"$javaExecutable\"")" fi - javaHome="`dirname \"$javaExecutable\"`" - javaHome=`expr "$javaHome" : '\(.*\)/bin'` + javaHome="$(dirname "\"$javaExecutable\"")" + javaHome=$(expr "$javaHome" : '\(.*\)/bin') JAVA_HOME="$javaHome" export JAVA_HOME fi @@ -145,7 +118,7 @@ if [ -z "$JAVACMD" ] ; then JAVACMD="$JAVA_HOME/bin/java" fi else - JAVACMD="`which java`" + JAVACMD="$(\unset -f command 2>/dev/null; \command -v java)" fi fi @@ -159,12 +132,9 @@ if [ -z "$JAVA_HOME" ] ; then echo "Warning: JAVA_HOME environment variable is not set." fi -CLASSWORLDS_LAUNCHER=org.codehaus.plexus.classworlds.launcher.Launcher - # traverses directory structure from process work directory to filesystem root # first directory with .mvn subdirectory is considered project base directory find_maven_basedir() { - if [ -z "$1" ] then echo "Path not specified to find_maven_basedir" @@ -180,96 +150,99 @@ find_maven_basedir() { fi # workaround for JBEAP-8937 (on Solaris 10/Sparc) if [ -d "${wdir}" ]; then - wdir=`cd "$wdir/.."; pwd` + wdir=$(cd "$wdir/.." || exit 1; pwd) fi # end of workaround done - echo "${basedir}" + printf '%s' "$(cd "$basedir" || exit 1; pwd)" } # concatenates all lines of a file concat_lines() { if [ -f "$1" ]; then - echo "$(tr -s '\n' ' ' < "$1")" + # Remove \r in case we run on Windows within Git Bash + # and check out the repository with auto CRLF management + # enabled. Otherwise, we may read lines that are delimited with + # \r\n and produce $'-Xarg\r' rather than -Xarg due to word + # splitting rules. + tr -s '\r\n' ' ' < "$1" fi } -BASE_DIR=`find_maven_basedir "$(pwd)"` +log() { + if [ "$MVNW_VERBOSE" = true ]; then + printf '%s\n' "$1" + fi +} + +BASE_DIR=$(find_maven_basedir "$(dirname "$0")") if [ -z "$BASE_DIR" ]; then exit 1; fi +MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}; export MAVEN_PROJECTBASEDIR +log "$MAVEN_PROJECTBASEDIR" + ########################################################################################## # Extension to allow automatically downloading the maven-wrapper.jar from Maven-central # This allows using the maven wrapper in projects that prohibit checking in binary data. ########################################################################################## -if [ -r "$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" ]; then - if [ "$MVNW_VERBOSE" = true ]; then - echo "Found .mvn/wrapper/maven-wrapper.jar" - fi +wrapperJarPath="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" +if [ -r "$wrapperJarPath" ]; then + log "Found $wrapperJarPath" else - if [ "$MVNW_VERBOSE" = true ]; then - echo "Couldn't find .mvn/wrapper/maven-wrapper.jar, downloading it ..." - fi + log "Couldn't find $wrapperJarPath, downloading it ..." + if [ -n "$MVNW_REPOURL" ]; then - jarUrl="$MVNW_REPOURL/io/takari/maven-wrapper/0.5.5/maven-wrapper-0.5.5.jar" + wrapperUrl="$MVNW_REPOURL/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" else - jarUrl="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.5/maven-wrapper-0.5.5.jar" + wrapperUrl="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" fi - while IFS="=" read key value; do - case "$key" in (wrapperUrl) jarUrl="$value"; break ;; + while IFS="=" read -r key value; do + # Remove '\r' from value to allow usage on windows as IFS does not consider '\r' as a separator ( considers space, tab, new line ('\n'), and custom '=' ) + safeValue=$(echo "$value" | tr -d '\r') + case "$key" in (wrapperUrl) wrapperUrl="$safeValue"; break ;; esac - done < "$BASE_DIR/.mvn/wrapper/maven-wrapper.properties" - if [ "$MVNW_VERBOSE" = true ]; then - echo "Downloading from: $jarUrl" - fi - wrapperJarPath="$BASE_DIR/.mvn/wrapper/maven-wrapper.jar" + done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties" + log "Downloading from: $wrapperUrl" + if $cygwin; then - wrapperJarPath=`cygpath --path --windows "$wrapperJarPath"` + wrapperJarPath=$(cygpath --path --windows "$wrapperJarPath") fi if command -v wget > /dev/null; then - if [ "$MVNW_VERBOSE" = true ]; then - echo "Found wget ... using wget" - fi + log "Found wget ... using wget" + [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--quiet" if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then - wget "$jarUrl" -O "$wrapperJarPath" + wget $QUIET "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath" else - wget --http-user=$MVNW_USERNAME --http-password=$MVNW_PASSWORD "$jarUrl" -O "$wrapperJarPath" + wget $QUIET --http-user="$MVNW_USERNAME" --http-password="$MVNW_PASSWORD" "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath" fi elif command -v curl > /dev/null; then - if [ "$MVNW_VERBOSE" = true ]; then - echo "Found curl ... using curl" - fi + log "Found curl ... using curl" + [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--silent" if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then - curl -o "$wrapperJarPath" "$jarUrl" -f + curl $QUIET -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath" else - curl --user $MVNW_USERNAME:$MVNW_PASSWORD -o "$wrapperJarPath" "$jarUrl" -f + curl $QUIET --user "$MVNW_USERNAME:$MVNW_PASSWORD" -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath" fi - else - if [ "$MVNW_VERBOSE" = true ]; then - echo "Falling back to using Java to download" - fi - javaClass="$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.java" + log "Falling back to using Java to download" + javaSource="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.java" + javaClass="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.class" # For Cygwin, switch paths to Windows format before running javac if $cygwin; then - javaClass=`cygpath --path --windows "$javaClass"` + javaSource=$(cygpath --path --windows "$javaSource") + javaClass=$(cygpath --path --windows "$javaClass") fi - if [ -e "$javaClass" ]; then - if [ ! -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then - if [ "$MVNW_VERBOSE" = true ]; then - echo " - Compiling MavenWrapperDownloader.java ..." - fi - # Compiling the Java class - ("$JAVA_HOME/bin/javac" "$javaClass") + if [ -e "$javaSource" ]; then + if [ ! -e "$javaClass" ]; then + log " - Compiling MavenWrapperDownloader.java ..." + ("$JAVA_HOME/bin/javac" "$javaSource") fi - if [ -e "$BASE_DIR/.mvn/wrapper/MavenWrapperDownloader.class" ]; then - # Running the downloader - if [ "$MVNW_VERBOSE" = true ]; then - echo " - Running MavenWrapperDownloader.java ..." - fi - ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$MAVEN_PROJECTBASEDIR") + if [ -e "$javaClass" ]; then + log " - Running MavenWrapperDownloader.java ..." + ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$wrapperUrl" "$wrapperJarPath") || rm -f "$wrapperJarPath" fi fi fi @@ -278,33 +251,58 @@ fi # End of extension ########################################################################################## -export MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"} -if [ "$MVNW_VERBOSE" = true ]; then - echo $MAVEN_PROJECTBASEDIR +# If specified, validate the SHA-256 sum of the Maven wrapper jar file +wrapperSha256Sum="" +while IFS="=" read -r key value; do + case "$key" in (wrapperSha256Sum) wrapperSha256Sum=$value; break ;; + esac +done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties" +if [ -n "$wrapperSha256Sum" ]; then + wrapperSha256Result=false + if command -v sha256sum > /dev/null; then + if echo "$wrapperSha256Sum $wrapperJarPath" | sha256sum -c > /dev/null 2>&1; then + wrapperSha256Result=true + fi + elif command -v shasum > /dev/null; then + if echo "$wrapperSha256Sum $wrapperJarPath" | shasum -a 256 -c > /dev/null 2>&1; then + wrapperSha256Result=true + fi + else + echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available." + echo "Please install either command, or disable validation by removing 'wrapperSha256Sum' from your maven-wrapper.properties." + exit 1 + fi + if [ $wrapperSha256Result = false ]; then + echo "Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised." >&2 + echo "Investigate or delete $wrapperJarPath to attempt a clean download." >&2 + echo "If you updated your Maven version, you need to update the specified wrapperSha256Sum property." >&2 + exit 1 + fi fi + MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS" # For Cygwin, switch paths to Windows format before running java if $cygwin; then - [ -n "$M2_HOME" ] && - M2_HOME=`cygpath --path --windows "$M2_HOME"` [ -n "$JAVA_HOME" ] && - JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"` + JAVA_HOME=$(cygpath --path --windows "$JAVA_HOME") [ -n "$CLASSPATH" ] && - CLASSPATH=`cygpath --path --windows "$CLASSPATH"` + CLASSPATH=$(cygpath --path --windows "$CLASSPATH") [ -n "$MAVEN_PROJECTBASEDIR" ] && - MAVEN_PROJECTBASEDIR=`cygpath --path --windows "$MAVEN_PROJECTBASEDIR"` + MAVEN_PROJECTBASEDIR=$(cygpath --path --windows "$MAVEN_PROJECTBASEDIR") fi # Provide a "standardized" way to retrieve the CLI args that will # work with both Windows and non-Windows executions. -MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $@" +MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $*" export MAVEN_CMD_LINE_ARGS WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain +# shellcheck disable=SC2086 # safe args exec "$JAVACMD" \ $MAVEN_OPTS \ + $MAVEN_DEBUG_OPTS \ -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \ - "-Dmaven.home=${M2_HOME}" "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ + "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \ ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@" diff --git a/mvnw.cmd b/mvnw.cmd index b26ab24..c4586b5 100644 --- a/mvnw.cmd +++ b/mvnw.cmd @@ -18,15 +18,14 @@ @REM ---------------------------------------------------------------------------- @REM ---------------------------------------------------------------------------- -@REM Maven2 Start Up Batch script +@REM Apache Maven Wrapper startup batch script, version 3.2.0 @REM @REM Required ENV vars: @REM JAVA_HOME - location of a JDK home dir @REM @REM Optional ENV vars -@REM M2_HOME - location of maven2's installed home dir @REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands -@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a key stroke before ending +@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending @REM MAVEN_OPTS - parameters passed to the Java VM when running Maven @REM e.g. to debug Maven itself, use @REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000 @@ -46,8 +45,8 @@ if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%") @REM Execute a user defined script before this one if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre @REM check for pre script, once with legacy .bat ending and once with .cmd ending -if exist "%HOME%\mavenrc_pre.bat" call "%HOME%\mavenrc_pre.bat" -if exist "%HOME%\mavenrc_pre.cmd" call "%HOME%\mavenrc_pre.cmd" +if exist "%USERPROFILE%\mavenrc_pre.bat" call "%USERPROFILE%\mavenrc_pre.bat" %* +if exist "%USERPROFILE%\mavenrc_pre.cmd" call "%USERPROFILE%\mavenrc_pre.cmd" %* :skipRcPre @setlocal @@ -120,10 +119,10 @@ SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe" set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar" set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain -set DOWNLOAD_URL="https://repo.maven.apache.org/maven2/io/takari/maven-wrapper/0.5.5/maven-wrapper-0.5.5.jar" +set WRAPPER_URL="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" -FOR /F "tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( - IF "%%A"=="wrapperUrl" SET DOWNLOAD_URL=%%B +FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( + IF "%%A"=="wrapperUrl" SET WRAPPER_URL=%%B ) @REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central @@ -134,11 +133,11 @@ if exist %WRAPPER_JAR% ( ) ) else ( if not "%MVNW_REPOURL%" == "" ( - SET DOWNLOAD_URL="%MVNW_REPOURL%/io/takari/maven-wrapper/0.5.5/maven-wrapper-0.5.5.jar" + SET WRAPPER_URL="%MVNW_REPOURL%/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar" ) if "%MVNW_VERBOSE%" == "true" ( echo Couldn't find %WRAPPER_JAR%, downloading it ... - echo Downloading from: %DOWNLOAD_URL% + echo Downloading from: %WRAPPER_URL% ) powershell -Command "&{"^ @@ -146,7 +145,7 @@ if exist %WRAPPER_JAR% ( "if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^ "$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^ "}"^ - "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%DOWNLOAD_URL%', '%WRAPPER_JAR%')"^ + "[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%WRAPPER_URL%', '%WRAPPER_JAR%')"^ "}" if "%MVNW_VERBOSE%" == "true" ( echo Finished downloading %WRAPPER_JAR% @@ -154,11 +153,35 @@ if exist %WRAPPER_JAR% ( ) @REM End of extension +@REM If specified, validate the SHA-256 sum of the Maven wrapper jar file +SET WRAPPER_SHA_256_SUM="" +FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO ( + IF "%%A"=="wrapperSha256Sum" SET WRAPPER_SHA_256_SUM=%%B +) +IF NOT %WRAPPER_SHA_256_SUM%=="" ( + powershell -Command "&{"^ + "$hash = (Get-FileHash \"%WRAPPER_JAR%\" -Algorithm SHA256).Hash.ToLower();"^ + "If('%WRAPPER_SHA_256_SUM%' -ne $hash){"^ + " Write-Output 'Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised.';"^ + " Write-Output 'Investigate or delete %WRAPPER_JAR% to attempt a clean download.';"^ + " Write-Output 'If you updated your Maven version, you need to update the specified wrapperSha256Sum property.';"^ + " exit 1;"^ + "}"^ + "}" + if ERRORLEVEL 1 goto error +) + @REM Provide a "standardized" way to retrieve the CLI args that will @REM work with both Windows and non-Windows executions. set MAVEN_CMD_LINE_ARGS=%* -%MAVEN_JAVA_EXE% %JVM_CONFIG_MAVEN_PROPS% %MAVEN_OPTS% %MAVEN_DEBUG_OPTS% -classpath %WRAPPER_JAR% "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* +%MAVEN_JAVA_EXE% ^ + %JVM_CONFIG_MAVEN_PROPS% ^ + %MAVEN_OPTS% ^ + %MAVEN_DEBUG_OPTS% ^ + -classpath %WRAPPER_JAR% ^ + "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" ^ + %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %* if ERRORLEVEL 1 goto error goto end @@ -168,15 +191,15 @@ set ERROR_CODE=1 :end @endlocal & set ERROR_CODE=%ERROR_CODE% -if not "%MAVEN_SKIP_RC%" == "" goto skipRcPost +if not "%MAVEN_SKIP_RC%"=="" goto skipRcPost @REM check for post script, once with legacy .bat ending and once with .cmd ending -if exist "%HOME%\mavenrc_post.bat" call "%HOME%\mavenrc_post.bat" -if exist "%HOME%\mavenrc_post.cmd" call "%HOME%\mavenrc_post.cmd" +if exist "%USERPROFILE%\mavenrc_post.bat" call "%USERPROFILE%\mavenrc_post.bat" +if exist "%USERPROFILE%\mavenrc_post.cmd" call "%USERPROFILE%\mavenrc_post.cmd" :skipRcPost @REM pause the script if MAVEN_BATCH_PAUSE is set to 'on' -if "%MAVEN_BATCH_PAUSE%" == "on" pause +if "%MAVEN_BATCH_PAUSE%"=="on" pause -if "%MAVEN_TERMINATE_CMD%" == "on" exit %ERROR_CODE% +if "%MAVEN_TERMINATE_CMD%"=="on" exit %ERROR_CODE% -exit /B %ERROR_CODE% +cmd /C exit /B %ERROR_CODE% diff --git a/pom.xml b/pom.xml index 6d481e0..f00eae9 100644 --- a/pom.xml +++ b/pom.xml @@ -183,40 +183,5 @@ - - - prod - - prod - - - - - org.springframework.boot - spring-boot-maven-plugin - ${spring-boot.version} - - - - - - - dev - - dev - - - true - - - - - org.springframework.boot - spring-boot-maven-plugin - ${spring-boot.version} - - - - - + \ No newline at end of file diff --git a/utils/auto.post b/utils/auto.post new file mode 100644 index 0000000..e69de29 diff --git a/utils/auto.sh b/utils/auto.sh new file mode 100644 index 0000000..d68cabd --- /dev/null +++ b/utils/auto.sh @@ -0,0 +1,19 @@ +#!/bin/bash +source ${PWD}/../.venv/bin/activate +./mvnw -pl -web -DskipTests clean install + +./mvnw spring-boot:run -f main/pom.xml & + +proc_id=$! +echo proc_id is $proc_id + +sleep 10 + +python fileupload.py http://localhost:8080/multipleupload ~/rnd/data/subset_govdocs/ 100 2 + + +#time curl -X 'POST' 'http://localhost:8080/propertyvalues?property=FORMAT' -H 'accept: */*' -d '' +ab -n 50 -c 1 -p auto.post -T 'text/plain' 'http://localhost:8080/propertyvalues?property=FORMAT' + +echo killing proc_id +kill $proc_id \ No newline at end of file diff --git a/utils/fileupload.py b/utils/fileupload.py new file mode 100644 index 0000000..45393a0 --- /dev/null +++ b/utils/fileupload.py @@ -0,0 +1,73 @@ +import os +import sys +import requests +from concurrent.futures import ThreadPoolExecutor +import time + +def upload_chunk(url, chunk_files, chunk_count): + headers = { + 'accept': '*/*', + } + + start_time = time.time() + + response = requests.post(url, headers=headers, files=chunk_files) + end_time = time.time() + + print(f"Uploaded {chunk_count} files. Time taken: {(end_time - start_time):.2f} seconds. Status Code: {response.status_code}") + #print(response.text) + + return end_time - start_time + +def upload_files_in_chunks_parallel(url, folder_path, chunk_size=100, num_parallel_requests=10): + headers = { + 'accept': '*/*', + } + + chunk_count = 0 + chunk_files = [] + total_duration = 0 + + for root, _, filenames in os.walk(folder_path): + for filename in filenames: + if filename.endswith('.xml'): + file_path = os.path.join(root, filename) + relative_path = os.path.relpath(file_path, folder_path) + chunk_files.append(('files', (relative_path, open(file_path, 'rb'), 'text/xml'))) + + if len(chunk_files) == chunk_size: + with ThreadPoolExecutor(max_workers=num_parallel_requests) as executor: + future = executor.submit(upload_chunk, url, chunk_files, (chunk_count+1)*chunk_size) + duration = future.result() + + total_duration += duration + + chunk_count += 1 + chunk_files = [] + + if chunk_files: + with ThreadPoolExecutor(max_workers=num_parallel_requests) as executor: + future = executor.submit(upload_chunk, url, chunk_files, chunk_count) + duration = future.result() + total_duration += duration + + return total_duration + +if __name__ == "__main__": + if len(sys.argv) != 5: + print("Usage: python script.py ") + sys.exit(1) + + upload_url = sys.argv[1] + folder_to_upload = sys.argv[2] + chunk_size = int(sys.argv[3]) + num_parallel_requests = int(sys.argv[4]) + + start_script_time = time.time() + + total_duration = upload_files_in_chunks_parallel(upload_url, folder_to_upload, chunk_size, num_parallel_requests) + + end_script_time = time.time() + script_duration = end_script_time - start_script_time + print(f"\nScript Execution Duration: {script_duration:.2f} seconds") + print(f"Total Upload Duration: {total_duration:.2f} seconds") diff --git a/utils/fileupload.sh b/utils/fileupload.sh new file mode 100644 index 0000000..022ad26 --- /dev/null +++ b/utils/fileupload.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +COUNTER=0 + +PARALLEL_TASKS=30 + +for i in $(find $2 -type f -print) +do + (( COUNTER++ )) + (( t=t%PARALLEL_TASKS )); ((t++==0)) && wait + curl --silent --output /dev/null --show-error -X "POST" \ + "${1}/upload" \ + -H "accept: */*" \ + -H "Content-Type: multipart/form-data" \ + -F "file=@${i}" & + + if ! (( $COUNTER % $PARALLEL_TASKS )) ; then + printf "%d files uploaded\n" $COUNTER + fi + +done +wait diff --git a/web/Dockerfile b/web/Dockerfile index ff487cb..9e948dd 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -5,19 +5,19 @@ FROM node:14.5.0-stretch-slim as build WORKDIR /app ENV PATH /app/node_modules/.bin:$PATH -COPY ./web/frontend/package.json ./ -COPY ./web/frontend/package-lock.json ./ +COPY ./frontend/package.json ./ +COPY ./frontend/package-lock.json ./ RUN npm ci RUN npm install react-scripts@3.4.1 -g -COPY ./web/frontend ./ +COPY ./frontend ./ RUN npm run build FROM nginx:stable-alpine-slim COPY --from=build /app/build /usr/share/nginx/html -COPY ./web/nginx/nginx.conf /etc/nginx/conf.d/default.conf +COPY ./nginx/nginx.conf /etc/nginx/conf.d/default.conf EXPOSE 3000 CMD ["nginx", "-g", "daemon off;"] diff --git a/web/frontend/src/AppConfig.jsx b/web/frontend/src/AppConfig.jsx index 17aa48b..bc2613d 100644 --- a/web/frontend/src/AppConfig.jsx +++ b/web/frontend/src/AppConfig.jsx @@ -15,13 +15,50 @@ const AppConfig = () => { "globalProperties", [] ); + const [globalStatistics, setGlobalStatistics] = useSessionStorage( + "globalStatistics", + [] + ); + + + + var myHeaders = new Headers(); + myHeaders.append("Content-Type", "application/json"); + + var requestOptions = { + method: "POST", + headers: myHeaders, + redirect: "follow", + }; + + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); const fetchGlobalProperties = async () => { - const response = await fetch(BACKEND_URL + "/properties"); + await fetch(BACKEND_URL + "/statistics?" + + new URLSearchParams({ + datasetName: dataset, + }), requestOptions); + const response = await fetch(BACKEND_URL + "/properties?" + + new URLSearchParams({ + datasetName: "default", + })); let data = await response.json(); let properties = data.map((prop) => prop.property); setGlobalProperties(properties); }; + + const fetchGlobalStatistics = async () => { + const response = await fetch(BACKEND_URL + "/statistics?" + + new URLSearchParams({ + datasetName: dataset, + }), requestOptions); + let data = await response.json(); + setGlobalStatistics(data); + }; + const fetchHealth = async () => { try { const response = await fetch(BACKEND_URL + "/health"); @@ -30,11 +67,18 @@ const AppConfig = () => { console.log(error); setErrorMessage("REST API is not accessible!"); } + } + + + + const fetchInitialData = async () => { + await fetchHealth(); + await fetchGlobalProperties(); + await fetchGlobalStatistics(); }; useEffect(() => { - fetchHealth(); - fetchGlobalProperties(); + fetchInitialData(); }, []); return ( diff --git a/web/frontend/src/components/Upload.jsx b/web/frontend/src/components/Upload.jsx index fbe600e..5429826 100644 --- a/web/frontend/src/components/Upload.jsx +++ b/web/frontend/src/components/Upload.jsx @@ -7,12 +7,16 @@ import "@uppy/dashboard/dist/style.css"; import XHRUpload from "@uppy/xhr-upload"; import { BACKEND_URL } from "../AppConfig"; -const Upload = () => { +const Upload = ({dataset}) => { const theme = useTheme(); const colors = tokens(theme.palette.mode); - const uppy = new Uppy().use(XHRUpload, { + const uppy = new Uppy({ + debug: true, + meta: { datasetName: dataset }, + }) .use(XHRUpload, { endpoint: BACKEND_URL + "/upload", + allowedMetaFields: ['datasetName'] }); return ( { const [filter, setFilter] = useSessionStorage("filterString", ""); - + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); const [data, setData] = useState([]); // GET with fetch API useEffect(() => { @@ -34,6 +37,7 @@ const PropertyValueDistribution = (payload) => { new URLSearchParams({ property: payload["property"], filter: filter, + datasetName: dataset }), requestOptions ); @@ -60,7 +64,7 @@ const PropertyValueDistribution = (payload) => { } }; fetchPost(); - }, [filter]); + }, [filter, dataset]); let filterClick = (property, event) => { if (event.indexValue == ".etc") { diff --git a/web/frontend/src/scenes/dashboard/index.jsx b/web/frontend/src/scenes/dashboard/index.jsx index a8423ef..100b50f 100644 --- a/web/frontend/src/scenes/dashboard/index.jsx +++ b/web/frontend/src/scenes/dashboard/index.jsx @@ -13,20 +13,28 @@ import {uniqueProperties} from "../../components/Filter"; const Dashboard = () => { const theme = useTheme(); const colors = tokens(theme.palette.mode); - const [sizeStatistics, setSizeStatistics] = useState([ - { - totalSize: 10047, - avgSize: 3349, - minSize: 4, - maxSize: 10000, - conflictRate: 0.17, - }, - ]); const [properties, setProperties] = useState([]); const [filter, setFilter] = useSessionStorage("filterString", ""); + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); + const [globalStatistics, setGlobalStatistics] = useSessionStorage( + "globalStatistics", + [ + { + totalSize: 10047, + avgSize: 3349, + minSize: 4, + maxSize: 10000, + conflictRate: 0.17, + }, + ] + ); + const [globalProperties, setGlobalProperties] = useSessionStorage( "globalProperties", [] @@ -40,7 +48,10 @@ const Dashboard = () => { ); const fetchGlobalProperties = async () => { - const response = await fetch(BACKEND_URL + "/properties"); + const response = await fetch(BACKEND_URL + "/properties?" + + new URLSearchParams({ + datasetName: "default", + })); let data = await response.json(); let properties = data.map((prop) => prop.property); setGlobalProperties(properties); @@ -61,19 +72,23 @@ const Dashboard = () => { "/statistics?" + new URLSearchParams({ filter: filter, + datasetName: dataset }), requestOptions ); const data = await response.json(); - setSizeStatistics(data); + setGlobalStatistics(data); + }; + + const fetchData = async () => { + await fetchStatistics(); + await fetchGlobalProperties(); }; useEffect(() => { console.log("loading the dashboard"); - - fetchStatistics(); - fetchGlobalProperties(); - }, [filter]); + fetchData(); + }, [filter, dataset]); const handleClick = () => { console.log("Conflict resolution started"); @@ -84,7 +99,10 @@ const Dashboard = () => { redirect: "follow", }; const response = await fetch( - BACKEND_URL + "/resolveconflicts", + BACKEND_URL + "/resolveconflicts?" + + new URLSearchParams({ + datasetName: dataset + }), requestOptions ); setConflictResolution({ @@ -112,43 +130,43 @@ const Dashboard = () => { @@ -164,9 +182,9 @@ const Dashboard = () => { { It is a set of activities within Digital Preservation to study digital collections based on characterisation results. Such results - are obtained by proccesing the digital object using characterisation + are obtained by processing the digital object using characterisation tools. diff --git a/web/frontend/src/scenes/global/Topbar.jsx b/web/frontend/src/scenes/global/Topbar.jsx index f6718e2..a43f8fb 100644 --- a/web/frontend/src/scenes/global/Topbar.jsx +++ b/web/frontend/src/scenes/global/Topbar.jsx @@ -1,17 +1,81 @@ -import { Box, IconButton, useTheme } from "@mui/material"; -import { useContext } from "react"; - -import { ColorModeContext, tokens } from "../../theme"; +import {Box, useTheme} from "@mui/material"; +import React, {useContext, useEffect} from "react"; +import InputLabel from '@mui/material/InputLabel'; +import MenuItem from '@mui/material/MenuItem'; +import FormControl from '@mui/material/FormControl'; +import Select from '@mui/material/Select'; +import {ColorModeContext, tokens} from "../../theme"; import Filter from "../../components/Filter"; +import {BACKEND_URL} from "../../AppConfig"; +import {useSessionStorage} from "@uidotdev/usehooks"; + const Topbar = () => { const theme = useTheme(); const colors = tokens(theme.palette.mode); - const colorMode = useContext(ColorModeContext); - return ( - - - + const [datasets, setDatasets] = useSessionStorage( + "datasets", + [] + ); + + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); + + var myHeaders = new Headers(); + myHeaders.append("Content-Type", "application/json"); + var requestGETOptions = { + method: "GET", + headers: myHeaders, + redirect: "follow", + }; + + const fetchDatasets = async () => { + const response = await fetch(BACKEND_URL + "/datasets", requestGETOptions); + let data = await response.json(); + setDatasets(data); + }; + + + + const colorMode = useContext(ColorModeContext); + + const fetchData = async () => { + await fetchDatasets(); + }; + + useEffect(() => { + fetchDatasets(); + }, []); + + + + const handleChange = (event) => { + setDataset(event.target.value); + }; + + const handleClick = (event) => { + fetchDatasets(); + }; + + + return ( + + + + + + + Dataset + + + + ); }; diff --git a/web/frontend/src/scenes/objectDetails/index.jsx b/web/frontend/src/scenes/objectDetails/index.jsx index a2e4159..2b820cf 100644 --- a/web/frontend/src/scenes/objectDetails/index.jsx +++ b/web/frontend/src/scenes/objectDetails/index.jsx @@ -25,6 +25,12 @@ const ObjectDetails = () => { "selectedObject", "" ); + + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); + useEffect(() => { console.log("loading the object details list"); var myHeaders = new Headers(); @@ -48,6 +54,7 @@ const ObjectDetails = () => { "/object?" + new URLSearchParams({ filepath: selectedObject, + datasetName: dataset }), requestOptions ); @@ -58,6 +65,7 @@ const ObjectDetails = () => { "/objectconflicts?" + new URLSearchParams({ filepath: selectedObject, + datasetName: dataset }), requestOptions ); diff --git a/web/frontend/src/scenes/objects/index.jsx b/web/frontend/src/scenes/objects/index.jsx index 5847c21..779c98c 100644 --- a/web/frontend/src/scenes/objects/index.jsx +++ b/web/frontend/src/scenes/objects/index.jsx @@ -19,6 +19,10 @@ const Objects = () => { "selectedObject", "" ); + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); const [filter, setFilter] = useSessionStorage("filterString", ""); const navigate = useNavigate(); useEffect(() => { @@ -44,6 +48,7 @@ const Objects = () => { "/objects?" + new URLSearchParams({ filter: filter, + datasetName: dataset }), requestOptions ); diff --git a/web/frontend/src/scenes/samples/index.jsx b/web/frontend/src/scenes/samples/index.jsx index c61e981..a31512a 100644 --- a/web/frontend/src/scenes/samples/index.jsx +++ b/web/frontend/src/scenes/samples/index.jsx @@ -40,15 +40,14 @@ const Samples = () => { redirect: "follow", }; + const params = new URLSearchParams(); + params.append('filter', filter) + params.append('algorithm',"SELECTIVE_FEATURE_DISTRIBUTION") + params.append('properties', ["FORMAT", "MIMETYPE", "FORMAT_VERSION"]) + const response = await fetch( BACKEND_URL + - "/samples?" + - new URLSearchParams({ - filter: filter, - properties: "FORMAT", - properties: "MIMETYPE", - algorithm: "SELECTIVE_FEATURE_DISTRIBUTION", - }), + `/samples?${params.toString()}`, requestOptions ); const data = await response.json(); diff --git a/web/frontend/src/scenes/uploadForm/index.jsx b/web/frontend/src/scenes/uploadForm/index.jsx index a6390f7..de13a50 100644 --- a/web/frontend/src/scenes/uploadForm/index.jsx +++ b/web/frontend/src/scenes/uploadForm/index.jsx @@ -1,16 +1,36 @@ -import { Box } from "@mui/material"; +import {Box, Button} from "@mui/material"; import Header from "../../components/Header"; import Upload from "../../components/Upload"; +import TextField from "@mui/material/TextField"; +import React from "react"; +import {useContext, useState} from "react"; + const UploadForm = () => { + + const [newDataset, setNewDataset] = useState(''); + + const handleTextInputChange = event => { + setNewDataset(event.target.value); + }; + + + + return (
+ + + + + - +
);