diff --git a/Dockerfile b/Dockerfile index 14e1730..7270632 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM maven:3.9.0 as builder COPY . /app WORKDIR /app -RUN --mount=type=cache,target=/root/.m2 mvn -pl -web clean install -Pdocker +RUN --mount=type=cache,target=/root/.m2 mvn -pl -web clean install FROM openjdk:21-jdk-slim WORKDIR /app diff --git a/README.md b/README.md index 1d0300c..6eb7969 100644 --- a/README.md +++ b/README.md @@ -41,12 +41,12 @@ docker-compose -f docker-compose.dev.yaml up --build File uploading using bash: ``` -bash fileupload.sh http://localhost:8082 ~/rnd/data/govdocs_fits/govdocs1/000/ +bash ./utils/fileupload.sh http://localhost:8082 ~/rnd/data/govdocs_fits/govdocs1/000/ ``` File uploading using python (pip package requests in necessary): ``` -python fileupload.py http://localhost:8082/multipleupload ~/rnd/data/govdocs_fits/govdocs1/000/ 100 3 +python ./utils/fileupload.py http://localhost:8082/multipleupload ~/rnd/data/govdocs_fits/govdocs1/000/ 100 3 ``` ## Issues diff --git a/cassandra/docker-compose.yaml b/cassandra/docker-compose.yaml deleted file mode 100644 index e1c7d38..0000000 --- a/cassandra/docker-compose.yaml +++ /dev/null @@ -1,7 +0,0 @@ -version: '3.9' - -services: - cassandra: - image: cassandra - ports: - - 9042:9042 \ No newline at end of file diff --git a/core/src/main/java/rocks/artur/api_impl/ResolveConflictsImpl.java b/core/src/main/java/rocks/artur/api_impl/CRH_ResolveConflictsImpl.java similarity index 97% rename from core/src/main/java/rocks/artur/api_impl/ResolveConflictsImpl.java rename to core/src/main/java/rocks/artur/api_impl/CRH_ResolveConflictsImpl.java index d3a7036..38d1911 100644 --- a/core/src/main/java/rocks/artur/api_impl/ResolveConflictsImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/CRH_ResolveConflictsImpl.java @@ -12,16 +12,16 @@ import java.util.function.Function; import java.util.stream.Collectors; -public class ResolveConflictsImpl implements ResolveConflicts { +public class CRH_ResolveConflictsImpl {//implements ResolveConflicts { private CharacterisationResultGateway characterisationResultGateway; - public ResolveConflictsImpl(CharacterisationResultGateway characterisationResultGateway) { + public CRH_ResolveConflictsImpl(CharacterisationResultGateway characterisationResultGateway) { this.characterisationResultGateway = characterisationResultGateway; } - @Override + public void run() { init(); System.out.println(sourceWeights); diff --git a/core/src/main/java/rocks/artur/api_impl/Native_ResolveConflictsImpl.java b/core/src/main/java/rocks/artur/api_impl/Native_ResolveConflictsImpl.java new file mode 100644 index 0000000..3847b5c --- /dev/null +++ b/core/src/main/java/rocks/artur/api_impl/Native_ResolveConflictsImpl.java @@ -0,0 +1,16 @@ +package rocks.artur.api_impl; + +import rocks.artur.api.ResolveConflicts; +import rocks.artur.domain.CharacterisationResultGateway; + +public class Native_ResolveConflictsImpl implements ResolveConflicts { + private CharacterisationResultGateway characterisationResultGateway; + + public Native_ResolveConflictsImpl(CharacterisationResultGateway characterisationResultGateway) { + this.characterisationResultGateway = characterisationResultGateway; + } + @Override + public void run() { + characterisationResultGateway.resolveConflictsNative(); + } +} diff --git a/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java b/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java index f9a86bc..a792246 100644 --- a/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java +++ b/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java @@ -85,4 +85,6 @@ public interface CharacterisationResultGateway { double getConflictRate(); void delete(CharacterisationResult characterisationResult); + + void resolveConflictsNative(); } diff --git a/docker-compose.clickhouse.dev.yaml b/docker-compose.clickhouse.dev.yaml new file mode 100644 index 0000000..df98a57 --- /dev/null +++ b/docker-compose.clickhouse.dev.yaml @@ -0,0 +1,105 @@ +version: '3' + +services: + + fits: + build: + context: . + dockerfile: ./fits/Dockerfile + container_name: fits + env_file: .env + networks: + - web + restart: unless-stopped + ports: + - 8081:8080 + + rest: + build: + context: . + dockerfile: ./Dockerfile + env_file: .env + networks: + - web + restart: unless-stopped + environment: + - DB_SELECTOR=clickhouse + deploy: + replicas: 1 + ports: + - 8092:8080 + depends_on: + - fits + - db-docker + + web: + build: + context: . + dockerfile: ./web/Dockerfile + container_name: web + env_file: .env + networks: + - web + restart: unless-stopped + ports: + - 8080:3000 + + db-docker: + image: yandex/clickhouse-server + container_name: db-docker + networks: + - web + ports: + - 8123:8123 + - 9000:9000 + - 9004:9004 + + + db-docker-init: + image: yandex/clickhouse-server + container_name: db-docker-init + volumes: + - ./utils/clickhouse:/var/clickhouse + depends_on: + - db-docker + networks: + - web + entrypoint: [ '/bin/sh', '-c' ] + command: | + " + while ! clickhouse-client --host db-docker -q \"SHOW databases;\"; do + echo waiting for clickhouse up + sleep 1 + done + + clickhouse-client --host db-docker --queries-file /var/clickhouse/initdb.sql + + tail -f /dev/null + " + + + adminer: + image: adminer + container_name: adminer + env_file: .env + restart: unless-stopped + networks: + - web + ports: + - 8090:8080 + + nginx: + image: nginx + container_name: nginx + env_file: .env + volumes: + - ./utils/nginx/nginx.conf:/etc/nginx/conf.d/default.conf + ports: + - 8082:80 + networks: + - web + depends_on: + - rest + +networks: + web: \ No newline at end of file diff --git a/docker-compose.dev-cluster.yaml b/docker-compose.mysql.cluster.yaml similarity index 91% rename from docker-compose.dev-cluster.yaml rename to docker-compose.mysql.cluster.yaml index b94a251..5d0f224 100644 --- a/docker-compose.dev-cluster.yaml +++ b/docker-compose.mysql.cluster.yaml @@ -21,6 +21,7 @@ services: env_file: .env environment: - SPRING_DATASOURCE_URL=jdbc:mysql://mysql-router:6446/fitsinn + - DB_SELECTOR=mysql networks: - web restart: unless-stopped @@ -58,7 +59,7 @@ services: mysql-server-1: container_name: mysql-server-1 env_file: - - mysql-cluster/mysql-server.env + - utils/mysql-cluster/mysql-server.env image: mysql/mysql-server:8.0.12 networks: - web @@ -83,7 +84,7 @@ services: mysql-server-2: container_name: mysql-server-2 env_file: - - mysql-cluster/mysql-server.env + - utils/mysql-cluster/mysql-server.env image: mysql/mysql-server:8.0.12 networks: - web @@ -109,7 +110,7 @@ services: mysql-server-3: container_name: mysql-server-3 env_file: - - mysql-cluster/mysql-server.env + - utils/mysql-cluster/mysql-server.env image: mysql/mysql-server:8.0.12 networks: - web @@ -134,7 +135,7 @@ services: mysql-shell: container_name: mysql-shell env_file: - - mysql-cluster/mysql-shell.env + - utils/mysql-cluster/mysql-shell.env image: neumayer/mysql-shell-batch networks: - web @@ -148,7 +149,7 @@ services: mysql-router: container_name: mysql-router env_file: - - mysql-cluster/mysql-router.env + - utils/mysql-cluster/mysql-router.env image: mysql/mysql-router:8.0 networks: - web @@ -166,7 +167,7 @@ services: container_name: nginx env_file: .env volumes: - - ./nginx.conf:/etc/nginx/conf.d/default.conf + - ./utils/nginx/nginx.conf:/etc/nginx/conf.d/default.conf ports: - 8082:80 networks: diff --git a/docker-compose.dev.yaml b/docker-compose.mysql.dev.yaml similarity index 91% rename from docker-compose.dev.yaml rename to docker-compose.mysql.dev.yaml index b2a0ed1..17c9cc3 100644 --- a/docker-compose.dev.yaml +++ b/docker-compose.mysql.dev.yaml @@ -25,8 +25,11 @@ services: environment: - LOGGING_LEVEL_ORG_HIBERNATE_SQL=DEBUG - SPRING_JPA_SHOW_SQL=true + - DB_SELECTOR=mysql deploy: replicas: 1 + ports: + - 8092:8080 depends_on: - fits - db-docker @@ -58,6 +61,7 @@ services: ports: - 3306:3306 + adminer: image: adminer container_name: adminer @@ -73,7 +77,7 @@ services: container_name: nginx env_file: .env volumes: - - ./nginx.conf:/etc/nginx/conf.d/default.conf + - ./utils/nginx/nginx.conf:/etc/nginx/conf.d/default.conf ports: - 8082:80 networks: diff --git a/fits-client/src/main/java/rocks/artur/FITSClient/FITSClient.java b/fits-client/src/main/java/rocks/artur/FITSClient/FITSClient.java index 1438c1e..fa0a57f 100644 --- a/fits-client/src/main/java/rocks/artur/FITSClient/FITSClient.java +++ b/fits-client/src/main/java/rocks/artur/FITSClient/FITSClient.java @@ -19,6 +19,7 @@ import rocks.artur.api_impl.utils.ByteFile; import rocks.artur.domain.CharacterisationResult; import rocks.artur.domain.Property; +import rocks.artur.domain.ValueType; import rocks.artur.utils.JSONToolkit; import rocks.artur.utils.STAXToolkit; @@ -33,10 +34,12 @@ import java.io.File; import java.io.IOException; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Set; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; +import java.util.*; import java.util.stream.Collectors; //@ApplicationScoped @@ -45,6 +48,13 @@ public class FITSClient implements CharacterisationResultProducer { List knownProperties = Arrays.stream(FITSPropertyJsonPath.values()).map(Enum::name).collect(Collectors.toList()); private String FITS_URL = "http://localhost:8888"; + + static DateTimeFormatter outputFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + static DateTimeFormatter inputFormatter = new DateTimeFormatterBuilder() + .appendPattern("[yyyy:MM:dd HH:mm:ssXXX][yyyy:MM:dd HH:mm:ss][yyyy:MM:dd HH:mmXXX][yyyy-MM-dd HH:mm:ss][yyyy/MM/dd HH:mm:ss]") + .toFormatter(); + @Override public String getVersion(){ @@ -123,12 +133,46 @@ public List processFile(ByteFile file) { } result.addAll(extractCharacterisationResultsStax(content)); } catch (Exception e) { - LOG.error("Exception occurred during file processing"); + LOG.error("Exception occurred during FITS file parsing"); e.printStackTrace(); } + + result=this.fixDateTypes(result); + return result; + } + + private ArrayList fixDateTypes(ArrayList result) { + result.stream().forEach(item -> { + if (item.getValueType().equals(ValueType.TIMESTAMP)){ + String value = item.getValue(); + LOG.debug(String.format("Parsing Object: %s", item)); + if (item.getSource().startsWith("OIS File Information")) { + LocalDateTime parsed = + LocalDateTime.ofInstant(Instant.ofEpochMilli(Long.parseLong(value)), + TimeZone.getDefault().toZoneId()); + item.setValue(parsed.format(outputFormatter)); + } else { + LocalDateTime parsed = tryParseLocalDateTime(value, inputFormatter); + if (parsed != null) { + item.setValue(parsed.format(outputFormatter)); + } else { + item.setValue(null); + } + } + LOG.debug(String.format("Parsed Result: %s", item)); + } + }); return result; } + LocalDateTime tryParseLocalDateTime(String datetimeString, DateTimeFormatter formatter) { + try { + return LocalDateTime.parse(datetimeString, formatter); + } catch (DateTimeParseException e) { + return null; + } + } + List extractCharacterisationResults(String fitsResultXML) throws JSONException { List results = new ArrayList<>(); String fitsResultJSON = JSONToolkit.translateXML(fitsResultXML); diff --git a/infra-persistence/pom.xml b/infra-persistence/pom.xml index 33f299d..8077390 100644 --- a/infra-persistence/pom.xml +++ b/infra-persistence/pom.xml @@ -15,21 +15,30 @@ - + + + rocks.artur + fitsinn-core + 0.1.0 + compile + + + + org.springframework.boot spring-boot-starter-data-jpa org.springframework.boot - spring-boot-starter-cache + spring-boot-starter-jdbc + - junit - junit - ${junit.version} - test + org.springframework.boot + spring-boot-starter-validation + org.springframework.boot spring-boot-starter-test @@ -41,6 +50,30 @@ + + + + + com.clickhouse + clickhouse-jdbc + 0.6.0 + all + + + + + com.h2database + h2 + ${h2.version} + + + + mysql + mysql-connector-java + 8.0.33 + runtime + + org.hibernate.orm hibernate-jpamodelgen @@ -53,35 +86,27 @@ 4.0.0 pom - - org.hibernate - hibernate-validator - 8.0.0.Final - - org.glassfish.jaxb jaxb-runtime 4.0.0 + - rocks.artur - fitsinn-core - 0.1.0 - compile - - - com.h2database - h2 - ${h2.version} + org.springdoc + springdoc-openapi-starter-webmvc-ui + 2.0.2 + + + - mysql - mysql-connector-java - 8.0.33 - runtime + junit + junit + ${junit.version} + test - + diff --git a/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultClickhouseRepository.java b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultClickhouseRepository.java new file mode 100644 index 0000000..1747e62 --- /dev/null +++ b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultClickhouseRepository.java @@ -0,0 +1,529 @@ +package rocks.artur.clickhouse; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.jdbc.core.ParameterizedPreparedStatementSetter; +import rocks.artur.api_impl.filter.AndFilterCriteria; +import rocks.artur.api_impl.filter.OrFilterCriteria; +import rocks.artur.api_impl.filter.SingleFilterCriteria; +import rocks.artur.domain.CharacterisationResult; +import rocks.artur.domain.FilterCriteria; +import rocks.artur.domain.Property; +import rocks.artur.domain.ValueType; +import rocks.artur.domain.statistics.PropertiesPerObjectStatistic; +import rocks.artur.domain.statistics.PropertyStatistic; + +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.List; +import java.util.stream.Collectors; + + +public class CharacterisationResultClickhouseRepository { + + private static final Logger LOG = LoggerFactory.getLogger(CharacterisationResultClickhouseRepository.class); + private final JdbcTemplate template; + + /** + * Creates a new instance. + * + * @param template to use to perform JDBC queries to the ClickHouse database. + */ + public CharacterisationResultClickhouseRepository(JdbcTemplate template) { + this.template = template; + } + + public void save(CharacterisationResult characterisationResult) { + + int rowsInserted = template.update("insert into characterisationresult (file_path,property, source, property_value, value_type)" + + " values (?,?,?,?,?)", + characterisationResult.getFilePath(), + characterisationResult.getProperty().name(), + characterisationResult.getSource(), + characterisationResult.getValue(), + characterisationResult.getValueType().name()); + + System.out.println("Number of rows updated = " + rowsInserted); + } + + public List getPropertyDistribution() { + String sql = String.format( + "select property, count(property_value) as number " + + "from characterisationresultaggregated " + + "group by property ORDER BY number desc LIMIT 200"); + + List result = template.query(sql, (rs, rowNum) -> { + PropertyStatistic propstat = new PropertyStatistic(rs.getLong("number"), Property.valueOf(rs.getString("property"))); + return propstat; + }); + return result; + } + + public List getPropertyValueDistribution(String property, FilterCriteria filter) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter); + subquery = String.format(" file_path in (%s) and ", subquery); + } + + String sql = String.format( + "select property, property_value, count(property_value) as number " + + "from characterisationresultaggregated " + + "where %s property = '%s' group by property, property_value ORDER BY number desc LIMIT 200", subquery, property); + LOG.info(sql); + List result = template.query(sql, (rs, rowNum) -> { + Object[] item = new Object[3]; + item[0] = rs.getString("property"); + item[1] = rs.getString("property_value"); + item[2] = rs.getLong("number"); + return item; + }); + return result; + } + + + public List getPropertyValueTimeStampDistribution(String property, FilterCriteria filter) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter); + subquery = String.format(" file_path in (%s) and ", subquery); + } + + String sql = String.format( + "select property, CASE " + + "WHEN property_value = 'CONFLICT' THEN property_value " + + "ELSE SUBSTRING(property_value,1,4) " + + "END as value, count(property) as number " + + "from characterisationresultaggregated " + + "where %s property = '%s' group by property, CASE " + + "WHEN property_value = 'CONFLICT' THEN property_value " + + "ELSE SUBSTRING(property_value,1,4) " + + "END ORDER BY number desc LIMIT 200", subquery, property); + + List result = template.query(sql, (rs, rowNum) -> { + Object[] item = new Object[3]; + item[0] = rs.getString(1); + item[1] = rs.getString(2); + item[2] = rs.getLong(3); + return item; + }); + return result; + } + + + public String convert(FilterCriteria filter) { + if (filter instanceof SingleFilterCriteria) { + Property property = ((SingleFilterCriteria) filter).getSearchKey(); + String operator = ((SingleFilterCriteria) filter).getOperation().getValue(); + String value = ((SingleFilterCriteria) filter).getSearchValue(); + String result; + switch (property.getValueType()) { + case TIMESTAMP: + if (!value.equals("CONFLICT")) { + result = String.format("select file_path from characterisationresult where property = '%s' and cast(property_value as DATETIME) %s cast('%s' as DATE)", property, operator, value); + } else { + result = String.format("select file_path from characterisationresultaggregated where property = '%s' and property_value %s '%s'", property, operator, value); + } + break; + default: + result = String.format("select file_path from characterisationresultaggregated where property = '%s' and property_value %s '%s'", property, operator, value); + } + return result; + } else if (filter instanceof AndFilterCriteria) { + AndFilterCriteria andFilter = (AndFilterCriteria) filter; + + String whereStatement1 = convert(andFilter.getCriteria()); + String whereStatement2 = convert(andFilter.getOtherCriteria()); + + String result = String.format("( (%s) INTERSECT (%s) )", whereStatement1, whereStatement2); + return result; + + } else if (filter instanceof OrFilterCriteria) { + OrFilterCriteria orFilter = (OrFilterCriteria) filter; + + String whereStatement1 = convert(orFilter.getCriteria()); + String whereStatement2 = convert(orFilter.getOtherCriteria()); + + String result = String.format("( (%s) UNION ALL (%s) )", whereStatement1, whereStatement2); + return result; + } else { + throw new UnsupportedOperationException("this type of FilterCriteria is not supported"); + } + } + + public void saveAll(List characterisationResults) { + + List filtered = characterisationResults.stream() + .filter(item -> item.getFilePath() != null) + .filter(item -> item.getValue() != null && item.getValue().length() < 300).collect(Collectors.toList()); + + template.batchUpdate("insert into characterisationresult (file_path,property, source, property_value, value_type)" + + " values (?,?,?,?,?)", + filtered, + 10000, + new ParameterizedPreparedStatementSetter() { + @Override + public void setValues(PreparedStatement ps, CharacterisationResult cResult) throws SQLException { + ps.setString(1, cResult.getFilePath()); + ps.setString(2, cResult.getProperty().name()); + ps.setString(3, cResult.getSource()); + ps.setString(4, cResult.getValue()); + ps.setString(5, cResult.getValueType().name()); + } + }); + + } + + public List getCharacterisationResults(FilterCriteria filter) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter); + subquery = String.format("where file_path in (%s) ", subquery); + } + + + String sql = String.format( + "select file_path,property, source, property_value, value_type " + + "from characterisationresult " + + "%s", subquery); + + List result = template.query(sql, (rs, rowNum) -> { + CharacterisationResult item = new CharacterisationResult(); + item.setFilePath(rs.getString(1)); + item.setProperty(Property.valueOf(rs.getString(2))); + item.setSource(rs.getString(3)); + item.setValue(rs.getString(4)); + item.setValueType(ValueType.valueOf(rs.getString(5))); + return item; + }); + return result; + } + + public Long getDigitalObjectCount() { + String query = String.format( + "select count(distinct file_path) from characterisationresultaggregated "); + + Long result = template.queryForObject(query, Long.class); + return result; + } + + public Long getConflictCount() { + String query = String.format( + "select count(distinct file_path) from characterisationresultaggregated where property_value = 'CONFLICT' "); + + Long result = template.queryForObject(query, Long.class); + return result; + } + + public List getSources() { + String sql = String.format( + "select distinct source from characterisationresult "); + + List result = template.query(sql, (rs, rowNum) -> { + return rs.getString(1); + }); + return result; + } + + public List getCharacterisationResultsByFilepath(String filePath) { + String sql = String.format( + "select file_path, property, source, property_value, value_type " + + "from characterisationresult " + + "where file_path='%s' ", filePath); + + List result = template.query(sql, (rs, rowNum) -> { + CharacterisationResult item = new CharacterisationResult(); + item.setFilePath(rs.getString(1)); + item.setProperty(Property.valueOf(rs.getString(2))); + item.setSource(rs.getString(3)); + item.setValue(rs.getString(4)); + item.setValueType(ValueType.valueOf(rs.getString(5))); + return item; + }); + return result; + } + + public double[] getSizeStatistics(FilterCriteria filter) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter); + subquery = String.format(" file_path in (%s) and ", subquery); + } + + String sql = String.format( + "select sum(toInt32(property_value)) as totalsize, " + + "min(toInt32(property_value)) as minsize, " + + "max(toInt32(property_value)) as maxsize, " + + "avg(toInt32(property_value)) as avgsize, " + + "count(property_value) as count " + + "from characterisationresultaggregated " + + "where %s property='SIZE'", subquery); + + List result = template.query(sql, (rs, rowNum) -> { + double sum = rs.getDouble(1); + double min = rs.getDouble(2); + double max = rs.getDouble(3); + double avg = rs.getDouble(4); + double count = rs.getDouble(5); + + return new double[]{sum, min, max, avg, count}; + }); + return result.get(0); + + } + + public double[] getConflictStatistics(FilterCriteria filter) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter); + subquery = String.format(" file_path in (%s) and ", subquery); + } + + String sql = String.format( + "select count(distinct file_path) as count " + + "from characterisationresultaggregated " + + "where %s property_value='CONFLICT'", subquery); + + Long conflictsCount = template.queryForObject(sql, Long.class); + + + String subquery2 = ""; + if (filter != null) { + subquery2 = convert(filter); + subquery2 = String.format("where file_path in (%s) ", subquery2); + } + + String sql2 = String.format( + "select count(distinct file_path) as count " + + "from characterisationresultaggregated " + + "%s", subquery2); + + Long totalCount = template.queryForObject(sql2, Long.class); + + double rate = 0d; + if (totalCount != 0) { + rate = (double) conflictsCount / totalCount; + } + double[] result = new double[]{conflictsCount, rate}; + return result; + } + + public List getObjects(FilterCriteria filter) { + String subquery = ""; + if (filter != null) { + subquery = convert(filter); + subquery = String.format(" where file_path in (%s) ", subquery); + } + + String sql = String.format( + "select file_path, count(*) " + + "from characterisationresultaggregated " + + " %s" + + "group by file_path", subquery); + + List result = template.query(sql, (rs, rowNum) -> { + PropertiesPerObjectStatistic statistic = new PropertiesPerObjectStatistic(rs.getLong(2), rs.getString(1)); + return statistic; + + }); + + return result; + } + + public List getRandomSamples(FilterCriteria filterCriteria, int sampleSize) { + String subquery = ""; + if (filterCriteria != null) { + subquery = convert(filterCriteria); + subquery = String.format(" where file_path in (%s) ", subquery); + } + + String sql = String.format( + "select file_path " + + "from characterisationresultaggregated " + + " %s" + + "group by file_path ORDER BY RAND() LIMIT %d ", subquery, sampleSize); + + List resultList = template.query(sql, (rs, rowNum) -> rs.getString(1)); + List collect = resultList.stream().map(item -> new String[]{"1", item}).collect(Collectors.toList()); + + return collect; + + } + + public List getSelectiveFeatureDistributionSamples(FilterCriteria filterCriteria, List properties) { + String subquery = ""; + if (filterCriteria != null) { + subquery = convert(filterCriteria); + subquery = String.format(" where file_path in (%s) ", subquery); + } + + + + StringBuilder select = new StringBuilder("SELECT "); + + for (int i = 0; i < properties.size(); i++) { + String currProperty = properties.get(i).name(); + if (i == 0) { + select.append(String.format("count(%s.file_path) as size, min(%s.file_path) as example, %s.property_value ", currProperty, currProperty, currProperty)); + } else { + select.append(String.format(", %s.property_value ", currProperty)); + } + } + + StringBuilder from = new StringBuilder("FROM "); + + for (int i = 0; i < properties.size(); i++) { + String currProperty = properties.get(i).name(); + if (i == 0) { + + from.append(String.format(" (SELECT v.property_value, v.file_path FROM characterisationresultaggregated v\n" + + "where %s v.property='%s' ) as %s ", subquery, currProperty, currProperty)); + } else { + from.append(String.format(" join (SELECT v.property_value, v.file_path FROM characterisationresultaggregated v\n" + + "where %s v.property='%s') as %s on %s.file_path=%s.file_path ", subquery, currProperty, currProperty, properties.get(0).name(), currProperty)); + } //TODO: Probably, the join is not required. Check if it is true. + } + + StringBuilder groupBy = new StringBuilder("GROUP BY "); + + for (int i = 0; i < properties.size(); i++) { + String currProperty = properties.get(i).name(); + if (i == 0) { + groupBy.append(String.format(" %s.property_value ", currProperty)); + } else { + groupBy.append(String.format(", %s.property_value ", currProperty)); + } + } + + + StringBuilder orderBy = new StringBuilder("ORDER BY size DESC"); + + String sql = String.format( + "%s %s %s %s", select, from, groupBy, orderBy); + System.out.println(sql); + + + List result = template.query(sql, (rs, rowNum) -> { + return new String[]{rs.getString(1), rs.getString(2)}; + }); + + return result; + } + + + public void resolveConflictsSimple(){ + /* + DROP TABLE IF EXISTS to_delete; + + CREATE TABLE to_delete + ( + file_path String, + property String, + source String + ) ENGINE = Memory; + + insert into to_delete + with weights as ( + SELECT source, + property, + COUNT(property_value) as count, + COUNT(property_value) * 1.0/ (SELECT count(property_value) FROM characterisationresultaggregated + WHERE property_value != 'CONFLICT' ) as weight + FROM characterisationresult + WHERE file_path in (SELECT file_path FROM characterisationresultaggregated WHERE property_value != 'CONFLICT' ) + GROUP BY source, property + ), + tmp_table as ( + SELECT file_path, property, source, property_value, weight FROM characterisationresult + JOIN weights on characterisationresult.property == weights.property and characterisationresult.source == weights.source + WHERE (file_path, property) in (SELECT file_path, property from characterisationresultaggregated WHERE property_value == 'CONFLICT') + ) + SELECT file_path,property,source FROM tmp_table + WHERE (file_path, property, weight) not in (SELECT file_path, property, MAX(weight) FROM tmp_table GROUP BY file_path, property); + + delete from characterisationresult + where (file_path, property, source) in (select file_path,property,source from to_delete); + + drop table IF EXISTS characterisationresultaggregated; + */ + + + String sql = String.format("DROP TABLE IF EXISTS to_delete;"); + int update = template.update(sql); + + + sql = String.format("" + + " CREATE TABLE to_delete\n" + + " (\n" + + " file_path String,\n" + + " property String,\n" + + " source String\n" + + " ) ENGINE = Memory;"); + update = template.update(sql); + + sql = String.format("" + + " insert into to_delete\n" + + " with weights as (\n" + + " SELECT source,\n" + + " property,\n" + + " COUNT(property_value) as count,\n" + + " COUNT(property_value) * 1.0/ (SELECT count(property_value) FROM characterisationresultaggregated\n" + + " WHERE property_value != 'CONFLICT' ) as weight\n" + + " FROM characterisationresult\n" + + " WHERE file_path in (SELECT file_path FROM characterisationresultaggregated WHERE property_value != 'CONFLICT' )\n" + + " GROUP BY source, property\n" + + " ),\n" + + " tmp_table as (\n" + + " SELECT file_path, property, source, property_value, weight FROM characterisationresult\n" + + " JOIN weights on characterisationresult.property == weights.property and characterisationresult.source == weights.source\n" + + " WHERE (file_path, property) in (SELECT file_path, property from characterisationresultaggregated WHERE property_value == 'CONFLICT')\n" + + " )\n" + + " SELECT file_path,property,source FROM tmp_table\n" + + " WHERE (file_path, property, weight) not in (SELECT file_path, property, MAX(weight) FROM tmp_table GROUP BY file_path, property);"); + update = template.update(sql); + + sql = String.format("" + + " delete from characterisationresult\n" + + " where (file_path, property, source) in (select file_path,property,source from to_delete);"); + update = template.update(sql); + + this.cleanAggregation(); + } + + + + void aggregateResults(){ + /* + CREATE TABLE IF NOT EXISTS characterisationresultaggregated + ENGINE = AggregatingMergeTree + ORDER BY (property, file_path) AS + SELECT file_path, property, + CASE + WHEN COUNT(distinct property_value) = 1 THEN MIN(property_value) + ELSE 'CONFLICT' + END AS property_value + FROM characterisationresult + GROUP BY property, file_path; + */ + String sql = String.format("" + + "CREATE TABLE IF NOT EXISTS characterisationresultaggregated\n" + + "ENGINE = AggregatingMergeTree\n" + + " ORDER BY (property, file_path) AS\n" + + "SELECT file_path, property,\n" + + " CASE\n" + + " WHEN COUNT(distinct property_value) = 1 THEN MIN(property_value)\n" + + " ELSE 'CONFLICT'\n" + + " END AS property_value\n" + + "FROM characterisationresult\n" + + "GROUP BY property, file_path;" + ); + template.update(sql); + } + + void cleanAggregation(){ + String sql = String.format("drop table IF EXISTS characterisationresultaggregated"); + int update = template.update(sql); + } + +} diff --git a/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultGatewayClickhouseImpl.java b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultGatewayClickhouseImpl.java new file mode 100644 index 0000000..dc35f24 --- /dev/null +++ b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultGatewayClickhouseImpl.java @@ -0,0 +1,200 @@ +package rocks.artur.clickhouse; + +import org.springdoc.core.customizers.ActuatorOperationCustomizer; +import rocks.artur.api_impl.filter.SingleFilterCriteria; +import rocks.artur.domain.*; +import rocks.artur.domain.statistics.BinningAlgorithms; +import rocks.artur.domain.statistics.PropertiesPerObjectStatistic; +import rocks.artur.domain.statistics.PropertyStatistic; +import rocks.artur.domain.statistics.PropertyValueStatistic; +import rocks.artur.jpa.view.CharacterisationResultViewJPA; + +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class CharacterisationResultGatewayClickhouseImpl implements CharacterisationResultGateway { + + CharacterisationResultClickhouseRepository repository; + public CharacterisationResultGatewayClickhouseImpl(CharacterisationResultClickhouseRepository repository) { + this.repository = repository; + } + + + + @Override + public void addCharacterisationResult(CharacterisationResult characterisationResult) { + repository.save(characterisationResult); + repository.cleanAggregation(); + } + + @Override + public List getCharacterisationResults(FilterCriteria filter) { + return repository.getCharacterisationResults(filter); + } + + @Override + public List getPropertyDistribution(FilterCriteria filter) { + return repository.getPropertyDistribution(); + } + + @Override + public List getCharacterisationResultsByFilepath(String filePath) { + return repository.getCharacterisationResultsByFilepath(filePath); + } + + @Override + public List getCharacterisationResultsByEntry(Entry entry) { + return null; + } + + @Override + public List getConflictEntries() { + return null; + } + + @Override + public List getEntries() { + return null; + } + + @Override + public List getConflictsByFilepath(String filepath) { + repository.aggregateResults(); + List results = new ArrayList<>(); + List allJPAByFilePath = getCharacterisationResultsByFilepath(filepath); + List properties = allJPAByFilePath.stream().map(item -> item.getProperty()).collect(Collectors.toList()); + + for (Property property : properties) { + List collect = allJPAByFilePath.stream().filter(item -> item.getProperty().equals(property)).toList(); + if (collect.stream().map(CharacterisationResult::getValue).distinct().count() > 1) { + results.addAll(collect); + } + } + return results; + } + + @Override + public Map getCollectionStatistics(FilterCriteria filterCriteria) { + repository.aggregateResults(); + Map result = new HashMap<>(); + + double[] sizeStatistics = repository.getSizeStatistics(filterCriteria); + result.put("totalSize", sizeStatistics[0]); + result.put("minSize", sizeStatistics[1]); + result.put("maxSize", sizeStatistics[2]); + result.put("avgSize", sizeStatistics[3]); + result.put("totalCount", sizeStatistics[4]); + + double[] conflictStatistics = repository.getConflictStatistics(filterCriteria); + result.put("conflictRate", conflictStatistics[1]); + result.put("conflictCount", conflictStatistics[0]); + return result; + } + + @Override + public List getPropertyValueDistribution(Property property, FilterCriteria filter) { + repository.aggregateResults(); + switch (property.getValueType()) { + case TIMESTAMP: { + List collect = null; + List propertyValueDistribution = + repository.getPropertyValueTimeStampDistribution(property.name(), filter); + collect = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0])) + .map(stat -> new PropertyValueStatistic((Long) stat[2], (String) stat[1])) + .collect(Collectors.toList()); + collect.sort(Comparator.comparingLong(PropertyValueStatistic::getCount).reversed()); + return collect; + } + case INTEGER: + case FLOAT: { + List propertyValueDistribution = + repository.getPropertyValueDistribution(property.name(), filter); + + List floats = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0]) && !(stat[1].equals("CONFLICT"))) + .map(stat -> { + Float val = Float.parseFloat(stat[1].toString()); + Long count = (Long) stat[2]; + + List result = new ArrayList<>(); + + for (long l = 0; l < count; l++) { + result.add(val); + } + return result; + } + ).flatMap(Collection::stream).sorted(Float::compare).collect(Collectors.toList()); + + List propertyValueStatistics = BinningAlgorithms.runBinning(floats); + + Optional conflicts = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0]) && stat[1].equals("CONFLICT")) + .map(stat -> (Long) stat[2]).findAny(); + + conflicts.ifPresent(aLong -> propertyValueStatistics.add(new PropertyValueStatistic(aLong, "CONFLICT"))); + + return propertyValueStatistics; + } + default: + List collect = null; + List propertyValueDistribution = + repository.getPropertyValueDistribution(property.name(), filter); + collect = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0])) + .map(stat -> new PropertyValueStatistic((Long) stat[2], (String) stat[1])) + .collect(Collectors.toList()); + collect.sort(Comparator.comparingLong(PropertyValueStatistic::getCount).reversed()); + return collect; + } + } + + @Override + public List getSources() { + return repository.getSources(); + } + + @Override + public List getObjects(FilterCriteria filterCriteria) { + return repository.getObjects(filterCriteria); + } + + @Override + public List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties) { + repository.aggregateResults(); + switch (algorithm) { + case RANDOM -> { + List samples = repository.getRandomSamples(filterCriteria, 10); + return samples; + } + case SELECTIVE_FEATURE_DISTRIBUTION -> { + List selectiveFeatureDistributionSamples = repository.getSelectiveFeatureDistributionSamples(filterCriteria, properties); + //List examples = selectiveFeatureDistributionSamples.stream().map(arr -> arr[1]).collect(Collectors.toList()); + return selectiveFeatureDistributionSamples; + } + } + return null; + } + + @Override + public void addCharacterisationResults(List characterisationResults) { + repository.saveAll(characterisationResults); + repository.cleanAggregation(); + } + + @Override + public double getConflictRate() { + repository.aggregateResults(); + Long totalCount = repository.getDigitalObjectCount(); + Long conflictCount = repository.getConflictCount(); + return conflictCount / (double) totalCount; + } + + @Override + public void delete(CharacterisationResult characterisationResult) { + + } + + @Override + public void resolveConflictsNative() { + repository.resolveConflictsSimple(); + repository.aggregateResults(); + } +} diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java b/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java index d7dfc91..180b8dd 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java @@ -2,7 +2,6 @@ import jakarta.transaction.Transactional; -import org.h2.jdbc.JdbcBatchUpdateException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import rocks.artur.domain.*; @@ -72,7 +71,7 @@ public List getPropertyValueDistribution(Property proper case TIMESTAMP: { List collect = null; List propertyValueDistribution = - characterisationResultViewRepository.getPropertyValueTimeStampDistribution(filter); + characterisationResultViewRepository.getPropertyValueTimeStampDistribution(property.name(), filter); collect = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0])) .map(stat -> new PropertyValueStatistic((Long) stat[2], (String) stat[1])) .collect(Collectors.toList()); @@ -82,7 +81,7 @@ public List getPropertyValueDistribution(Property proper case INTEGER: case FLOAT: { List propertyValueDistribution = - characterisationResultViewRepository.getPropertyValueDistribution(filter); + characterisationResultViewRepository.getPropertyValueDistribution(property.name(), filter); List floats = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0]) && !(stat[1].equals("CONFLICT"))) .map(stat -> { @@ -110,7 +109,7 @@ public List getPropertyValueDistribution(Property proper default: List collect = null; List propertyValueDistribution = - characterisationResultViewRepository.getPropertyValueDistribution(filter); + characterisationResultViewRepository.getPropertyValueDistribution(property.name(), filter); collect = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0])) .map(stat -> new PropertyValueStatistic((Long) stat[2], (String) stat[1])) .collect(Collectors.toList()); @@ -228,7 +227,7 @@ public void addCharacterisationResults(List characterisa } }); try { - characterisationResultRepository.saveFast(tmp); + characterisationResultRepository.saveAll(tmp); } catch (RuntimeException e) { LOG.error("Some characterisation results have already been persisted. Batch insert is not possible. Uploaded items with NULL values:" ); List collect = tmp.stream().filter(item -> item.getSource() == null || item.getProperty() == null || item.getFilePath() == null).collect(Collectors.toList()); @@ -251,4 +250,9 @@ public void delete(CharacterisationResult characterisationResult) { characterisationResultRepository.delete(new CharacterisationResultJPA(characterisationResult)); } + @Override + public void resolveConflictsNative() { + + } + } diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultJPA.java b/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultJPA.java index c82cc7d..e5ec917 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultJPA.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultJPA.java @@ -1,13 +1,15 @@ package rocks.artur.jpa.table; -import jakarta.persistence.*; +import jakarta.persistence.Column; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.Id; +import jakarta.persistence.Table; import org.hibernate.annotations.GenericGenerator; import rocks.artur.domain.CharacterisationResult; import rocks.artur.domain.Property; import rocks.artur.domain.ValueType; -import java.util.UUID; - @Entity @Table(name = "characterisationresult") diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultRepository.java b/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultRepository.java index 2184d23..fa80002 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultRepository.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/table/CharacterisationResultRepository.java @@ -5,7 +5,6 @@ import org.springframework.stereotype.Repository; import java.util.List; -import java.util.UUID; @Repository public interface CharacterisationResultRepository extends JpaRepository, CustomCharacterisationResultRepository { diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewRepository.java b/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewRepository.java index 2e2895e..71d48e3 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewRepository.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/view/CharacterisationResultViewRepository.java @@ -2,7 +2,6 @@ import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.Query; import org.springframework.stereotype.Repository; -import rocks.artur.jpa.table.CharacterisationResultJPA; import java.util.List; diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepository.java b/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepository.java index 878aa31..fa4bd35 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepository.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepository.java @@ -7,8 +7,8 @@ import java.util.List; public interface CustomCharacterisationResultViewRepository { - List getPropertyValueDistribution(FilterCriteria filter); - List getPropertyValueTimeStampDistribution(FilterCriteria filter); + List getPropertyValueDistribution(String property, FilterCriteria filter); + List getPropertyValueTimeStampDistribution(String property, FilterCriteria filter); List getObjects(FilterCriteria filterCriteria); diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepositoryImpl.java b/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepositoryImpl.java index 0b8015a..a95b03b 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepositoryImpl.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/view/CustomCharacterisationResultViewRepositoryImpl.java @@ -24,18 +24,19 @@ public CustomCharacterisationResultViewRepositoryImpl(EntityManager entityManage @Override @Cacheable("distributions") - public List getPropertyValueDistribution(FilterCriteria filter) { + public List getPropertyValueDistribution( String property,FilterCriteria filter) { - String subquery = "select distinct FILE_PATH from characterisationresultview "; + String subquery = ""; if (filter != null) { subquery = filterJPA.convert(filter); + subquery = String.format(" file_path in (%s) and ", subquery); } String query = String.format( - "select PROPERTY, PROPERTY_VALUE, count(*) " + - "from characterisationresultview t " + - "join (%s) c on t.FILE_PATH=c.FILE_PATH " + - "where VALUE_TYPE != 'TIMESTAMP' group by PROPERTY, PROPERTY_VALUE", subquery); + "select property, property_value, count(property_value) as number " + + "from characterisationresultview " + + "where %s property = '%s' group by property, property_value ORDER BY number desc LIMIT 200", subquery, property); + List resultList = entityManager.createNativeQuery(query).getResultList(); return resultList; @@ -43,24 +44,24 @@ public List getPropertyValueDistribution(FilterCriteria @Override @Cacheable("timedistributions") - public List getPropertyValueTimeStampDistribution(FilterCriteria filter) { + public List getPropertyValueTimeStampDistribution(String property, FilterCriteria filter) { - String subquery = "select distinct FILE_PATH from characterisationresultview "; + String subquery = ""; if (filter != null) { subquery = filterJPA.convert(filter); + subquery = String.format(" file_path in (%s) and ", subquery); } //THIS IS H2-SPECIFIC SQL, BECAUSE OF PARSEDATETIME String query = String.format( - "select PROPERTY, CASE " + - "WHEN PROPERTY_VALUE = 'CONFLICT' THEN PROPERTY_VALUE " + - "ELSE SUBSTRING(PROPERTY_VALUE,1,4) " + - "END, count(*) " + - "from characterisationresultview t " + - "join (%s) c on t.FILE_PATH=c.FILE_PATH " + - "where VALUE_TYPE = 'TIMESTAMP' group by PROPERTY, CASE " + - "WHEN PROPERTY_VALUE = 'CONFLICT' THEN PROPERTY_VALUE " + - "ELSE SUBSTRING(PROPERTY_VALUE,1,4) " + - "END", subquery); + "select property, CASE " + + "WHEN property_value = 'CONFLICT' THEN property_value " + + "ELSE SUBSTRING(property_value,1,4) " + + "END as value, count(property) as number " + + "from characterisationresultview " + + "where %s property = '%s' group by property, CASE " + + "WHEN property_value = 'CONFLICT' THEN property_value " + + "ELSE SUBSTRING(property_value,1,4) " + + "END ORDER BY number desc LIMIT 200", subquery, property); List resultList = entityManager.createNativeQuery(query).getResultList(); return resultList; @@ -68,15 +69,16 @@ public List getPropertyValueTimeStampDistribution(FilterCriteria getObjects(FilterCriteria filterCriteria) { - String subquery = "select distinct FILE_PATH from characterisationresultview "; + String subquery = ""; if (filterCriteria != null) { subquery = filterJPA.convert(filterCriteria); + subquery = String.format("where file_path in (%s)", subquery); } String query = String.format( "select t.FILE_PATH, count(*) " + "from characterisationresultview t " + - "join (%s) c on t.FILE_PATH=c.FILE_PATH " + + " %s" + "group by t.FILE_PATH", subquery); List resultList = entityManager.createNativeQuery(query).getResultList(); @@ -86,11 +88,13 @@ public List getObjects(FilterCriteria filterCriteria) { @Override @Cacheable("sizedistributions") public double[] getSizeStatistics(FilterCriteria filterCriteria) { - String subquery = "select distinct FILE_PATH from characterisationresultview "; + String subquery = ""; if (filterCriteria != null) { subquery = filterJPA.convert(filterCriteria); + subquery = String.format(" file_path in (%s) and ", subquery); } + String query = String.format( "select IFNULL(sum(cast(t.property_value as SIGNED)),0) as totalsize, " + "IFNULL(min(cast(t.property_value as SIGNED)),0) as minsize, " + @@ -98,8 +102,7 @@ public double[] getSizeStatistics(FilterCriteria filterCriteria) { "IFNULL(avg(cast(t.property_value as SIGNED)),0) as avgsize, " + "count(t.property_value) as count " + "from characterisationresultview t " + - "join (%s) c on t.FILE_PATH=c.FILE_PATH " + - "where t.PROPERTY='SIZE'", subquery); + "where %s t.PROPERTY='SIZE'", subquery); Object[] singleResult = (Object[]) entityManager.createNativeQuery(query).getSingleResult(); Double sum = Double.valueOf(singleResult[0].toString()); @@ -114,24 +117,32 @@ public double[] getSizeStatistics(FilterCriteria filterCriteria) { @Override public double[] getConflictStatistics(FilterCriteria filterCriteria) { - String subquery = "select distinct FILE_PATH from characterisationresultview "; + String subquery = ""; if (filterCriteria != null) { subquery = filterJPA.convert(filterCriteria); + subquery = String.format(" file_path in (%s) and ", subquery); } + String query = String.format( - "select count(distinct t.FILE_PATH) as count " + - "from characterisationresultview t " + - "join (%s) c on t.FILE_PATH=c.FILE_PATH " + - "where t.PROPERTY_VALUE='CONFLICT'", subquery); + "select count(distinct file_path) as count " + + "from characterisationresultview " + + "where %s property_value='CONFLICT'", subquery); Long conflictsCount = (Long) entityManager.createNativeQuery(query).getSingleResult(); + String subquery2 = ""; + if (filterCriteria != null) { + subquery2 = filterJPA.convert(filterCriteria); + subquery2 = String.format("where file_path in (%s) ", subquery2); + } + String query2 = String.format( - "select count(distinct t.FILE_PATH) as count " + - "from characterisationresultview t " + - "join (%s) c on t.FILE_PATH=c.FILE_PATH ", subquery); + "select count(distinct file_path) as count " + + "from characterisationresultview " + + "%s", subquery2); + Long totalCount = (Long) entityManager.createNativeQuery(query2).getSingleResult(); @@ -145,15 +156,15 @@ public double[] getConflictStatistics(FilterCriteria filterCriteria) { @Override public List getRandomSamples(FilterCriteria filterCriteria, int sampleSize) { - String subquery = "select distinct FILE_PATH from characterisationresultview "; + String subquery = ""; if (filterCriteria != null) { subquery = filterJPA.convert(filterCriteria); + subquery = String.format(" where file_path in (%s) ", subquery); } - String query = String.format( "select t.FILE_PATH " + "from characterisationresultview t " + - "join (%s) c on t.FILE_PATH=c.FILE_PATH group by t.FILE_PATH " + + "%s group by t.FILE_PATH " + "ORDER BY RAND() LIMIT %d ", subquery, sampleSize); List resultList = entityManager.createNativeQuery(query).getResultList(); @@ -163,9 +174,10 @@ public List getRandomSamples(FilterCriteria filterCriteria, int sample public List getSelectiveFeatureDistributionSamples(FilterCriteria filterCriteria, List properties) { - String subquery = "select distinct FILE_PATH from characterisationresultview "; + String subquery = ""; if (filterCriteria != null) { subquery = filterJPA.convert(filterCriteria); + subquery = String.format(" file_path in (%s) and ", subquery); } @@ -187,10 +199,10 @@ public List getSelectiveFeatureDistributionSamples(FilterCriteria filt if (i == 0) { from.append(String.format(" (SELECT v.property_value, v.file_path FROM characterisationresultview v\n" + - "join (%s) c on v.FILE_PATH=c.FILE_PATH where v.property='%s' ) %s ", subquery, currProperty, currProperty)); + "where %s v.property='%s' ) %s ", subquery, currProperty, currProperty)); } else { from.append(String.format(" join (SELECT v.property_value, v.file_path FROM characterisationresultview v\n" + - "join (%s) c on v.FILE_PATH=c.FILE_PATH where v.property='%s') %s on %s.file_path=%s.file_path ", subquery, currProperty, currProperty, properties.get(0), currProperty)); + "where %s v.property='%s') %s on %s.file_path=%s.file_path ", subquery, currProperty, currProperty, properties.get(0), currProperty)); } //TODO: Probably, the join is not required. Check if it is true. } diff --git a/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java b/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java index a3216be..e99a608 100644 --- a/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java +++ b/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java @@ -101,6 +101,7 @@ public List getConflictsPerObject( @RequestMapping(method = RequestMethod.POST, value = "/statistics") + @Consumes(MediaType.APPLICATION_JSON) public Map getCollectionStatistics(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); diff --git a/main/pom.xml b/main/pom.xml index e1c684f..80accc8 100644 --- a/main/pom.xml +++ b/main/pom.xml @@ -41,16 +41,10 @@ datasource-proxy 1.4.1 - - org.springframework.boot - spring-boot-starter-data-jpa - - - junit junit diff --git a/main/src/main/java/rocks/artur/App.java b/main/src/main/java/rocks/artur/App.java index 284482f..6fd51fb 100644 --- a/main/src/main/java/rocks/artur/App.java +++ b/main/src/main/java/rocks/artur/App.java @@ -1,23 +1,23 @@ package rocks.artur; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.config.BeanFactoryPostProcessor; import org.springframework.beans.factory.support.BeanDefinitionRegistry; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; -import org.springframework.cache.annotation.EnableCaching; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.ClassPathBeanDefinitionScanner; import org.springframework.core.type.classreading.MetadataReader; import org.springframework.core.type.classreading.MetadataReaderFactory; -import org.springframework.core.type.filter.TypeFilter; -import org.springframework.data.jpa.repository.config.EnableJpaRepositories; -import org.springframework.scheduling.annotation.EnableAsync; + +import java.util.Arrays; @SpringBootApplication -@EnableCaching public class App { + private static final Logger LOG = LoggerFactory.getLogger(App.class); public static void main(String[] args) { SpringApplication.run(App.class); } @@ -26,25 +26,26 @@ public static void main(String[] args) { BeanFactoryPostProcessor beanFactoryPostProcessor() { return beanFactory -> genericApplicationContext((BeanDefinitionRegistry) beanFactory); } - void genericApplicationContext(BeanDefinitionRegistry beanRegistry) { ClassPathBeanDefinitionScanner beanDefinitionScanner = new ClassPathBeanDefinitionScanner(beanRegistry); - beanDefinitionScanner.addIncludeFilter(removeModelAndEntitiesFilter()); - beanDefinitionScanner.scan("rocks.artur.api","rocks.artur.api_impl","rocks.artur.FITSClient","rocks.artur.jpa","rocks.artur.endpoints.RestService"); + String profile = System.getenv("DB_SELECTOR") == null ? System.getProperty("spring.profiles.active", "h2") : System.getenv("DB_SELECTOR"); + System.out.println(profile); + beanDefinitionScanner.addIncludeFilter(App::match); + String[] packages; + switch (profile) { + case "clickhouse" -> + packages = new String[]{"rocks.artur.api", "rocks.artur.api_impl", "rocks.artur.FITSClient", "rocks.artur.endpoints.RestService", "rocks.artur.clickhouse"}; + case "h2", "mysql" -> + packages = new String[]{"rocks.artur.api", "rocks.artur.api_impl", "rocks.artur.FITSClient", "rocks.artur.endpoints.RestService", "rocks.artur.jpa"}; + default -> + throw new UnsupportedOperationException("The selected db is not supported. Choose one from [clickhouse, mysql, h2]"); + } + beanDefinitionScanner.scan(packages); } - - static TypeFilter removeModelAndEntitiesFilter() { - return (MetadataReader mr, MetadataReaderFactory mrf) -> { - return !mr.getClassMetadata() - .getClassName() - .startsWith("rocks.artur.domain") && - !mr.getClassMetadata() - .getClassName() - .startsWith("rocks.artur.api_impl.filter") && - !mr.getClassMetadata() - .getClassName() - .startsWith("rocks.artur.api_impl.utils") - ; - }; + private static boolean match(MetadataReader mr, MetadataReaderFactory mrf) { + String className = mr.getClassMetadata().getClassName(); + LOG.debug(className); + String[] packagesToIgnore = new String[]{"rocks.artur.domain", "rocks.artur.api_impl.filter", "rocks.artur.api_impl.utils"}; + return Arrays.stream(packagesToIgnore).noneMatch(className::startsWith); } } \ No newline at end of file diff --git a/main/src/main/resources/application-clickhouse.properties b/main/src/main/resources/application-clickhouse.properties new file mode 100644 index 0000000..d47113c --- /dev/null +++ b/main/src/main/resources/application-clickhouse.properties @@ -0,0 +1,30 @@ +spring.datasource.url=jdbc:clickhouse://db-docker:8123/default +spring.datasource.driverClassName=com.clickhouse.jdbc.ClickHouseDriver +spring.datasource.username=default +spring.datasource.password= + +spring.autoconfigure.exclude= \ +org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration, \ +org.springframework.boot.autoconfigure.orm.jpa.HibernateJpaAutoConfiguration, \ +org.springframework.boot.autoconfigure.jdbc.DataSourceTransactionManagerAutoConfiguration + +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.MySQLDialect + +#spring.sql.init.mode=always +#spring.sql.init.platform=h2 + +#spring.datasource.url=jdbc:h2:mem:default;DB_CLOSE_DELAY=-1 +#spring.datasource.driverClassName=org.h2.Driver +#spring.datasource.username=sa +#spring.datasource.password= + +#spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +#spring.jpa.hibernate.ddl-auto=none +#spring.h2.console.enabled=true +# default path: h2-console + +spring.servlet.multipart.max-file-size=1GB +spring.servlet.multipart.max-request-size=1GB +#spring.jpa.properties.hibernate.jdbc.batch_size=1000 +#spring.jpa.properties.hibernate.order_inserts=true +#spring.jpa.properties.hibernate.order_updates=true diff --git a/main/src/main/resources/application-h2.properties b/main/src/main/resources/application-h2.properties new file mode 100644 index 0000000..95628b2 --- /dev/null +++ b/main/src/main/resources/application-h2.properties @@ -0,0 +1,18 @@ +spring.sql.init.mode=always +#spring.sql.init.platform=h2 + +spring.datasource.url=jdbc:h2:mem:default;DB_CLOSE_DELAY=-1 +spring.datasource.driverClassName=org.h2.Driver +spring.datasource.username=sa +spring.datasource.password= + +spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect +spring.jpa.hibernate.ddl-auto=none +spring.h2.console.enabled=true +# default path: h2-console + +spring.servlet.multipart.max-file-size=1GB +spring.servlet.multipart.max-request-size=1GB +spring.jpa.properties.hibernate.jdbc.batch_size=1000 +spring.jpa.properties.hibernate.order_inserts=true +spring.jpa.properties.hibernate.order_updates=true diff --git a/main/src/main/resources/application-docker.properties b/main/src/main/resources/application-mysql.properties similarity index 95% rename from main/src/main/resources/application-docker.properties rename to main/src/main/resources/application-mysql.properties index b5067e7..c11a2ed 100644 --- a/main/src/main/resources/application-docker.properties +++ b/main/src/main/resources/application-mysql.properties @@ -1,4 +1,3 @@ -spring.sql.init.platform=mysql spring.sql.init.mode=always spring.jpa.hibernate.ddl-auto=none diff --git a/main/src/main/resources/application.properties b/main/src/main/resources/application.properties index 27ab208..e7209de 100644 --- a/main/src/main/resources/application.properties +++ b/main/src/main/resources/application.properties @@ -1,28 +1,2 @@ -spring.profiles.active=@active.profile@ - -spring.sql.init.mode=always -spring.sql.init.platform=h2 - -spring.datasource.url=jdbc:h2:mem:default;DB_CLOSE_DELAY=-1 -spring.datasource.driverClassName=org.h2.Driver -spring.datasource.username=sa -spring.datasource.password= - -spring.jpa.properties.hibernate.dialect=org.hibernate.dialect.H2Dialect - -#spring.jpa.show-sql=true -#logging.level.org.hibernate.SQL=DEBUG -#logging.level.org.hibernate.type.descriptor.sql.BasicBinder=TRACE -spring.jpa.hibernate.ddl-auto=none - -spring.h2.console.enabled=true -# default path: h2-console - -spring.servlet.multipart.max-file-size=1GB -spring.servlet.multipart.max-request-size=1GB -#server.port=8080 - - -spring.jpa.properties.hibernate.jdbc.batch_size=1000 -spring.jpa.properties.hibernate.order_inserts=true -spring.jpa.properties.hibernate.order_updates=true +#this defines what application.properties to use in run-time +spring.profiles.active=${DB_SELECTOR:h2} diff --git a/main/src/main/resources/data-h2.sql b/main/src/main/resources/data.sql similarity index 100% rename from main/src/main/resources/data-h2.sql rename to main/src/main/resources/data.sql diff --git a/main/src/test/java/rocks/artur/ResolveConflictsImplTest.java b/main/src/test/java/rocks/artur/CRHResolveConflictsImplTest.java similarity index 72% rename from main/src/test/java/rocks/artur/ResolveConflictsImplTest.java rename to main/src/test/java/rocks/artur/CRHResolveConflictsImplTest.java index fa5e145..7b86e91 100644 --- a/main/src/test/java/rocks/artur/ResolveConflictsImplTest.java +++ b/main/src/test/java/rocks/artur/CRHResolveConflictsImplTest.java @@ -1,38 +1,29 @@ package rocks.artur; -import org.junit.Assert; import org.junit.jupiter.api.Test; import org.junit.runner.RunWith; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.junit4.SpringRunner; -import rocks.artur.api_impl.ResolveConflictsImpl; +import rocks.artur.api_impl.CRH_ResolveConflictsImpl; import rocks.artur.domain.CharacterisationResult; -import rocks.artur.domain.FilterCriteria; -import rocks.artur.domain.Property; -import rocks.artur.domain.SamplingAlgorithms; -import rocks.artur.domain.statistics.PropertyStatistic; -import rocks.artur.domain.statistics.PropertyValueStatistic; -import rocks.artur.endpoints.CriteriaParser; import rocks.artur.jpa.CharacterisationResultGatewayJpaImpl; -import java.text.ParseException; import java.util.ArrayList; import java.util.List; -import java.util.Map; -@ActiveProfiles("dev") +@ActiveProfiles("h2") @RunWith(SpringRunner.class) @SpringBootTest -class ResolveConflictsImplTest { +class CRHResolveConflictsImplTest { @Autowired CharacterisationResultGatewayJpaImpl characterisationResultGatewaySqlImpl; @Autowired - ResolveConflictsImpl resolveConflicts; + CRH_ResolveConflictsImpl resolveConflicts; @Test void getAllTest() { diff --git a/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java b/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java index 9cd38da..fbba02f 100644 --- a/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java +++ b/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java @@ -23,7 +23,7 @@ import java.util.List; import java.util.Map; -@ActiveProfiles("dev") +@ActiveProfiles("h2") @RunWith(SpringRunner.class) @SpringBootTest class CharacterisationResultGatewayImplTest { diff --git a/main/src/test/java/rocks/artur/RestServiceTest.java b/main/src/test/java/rocks/artur/RestServiceTest.java index 82e12eb..b853247 100644 --- a/main/src/test/java/rocks/artur/RestServiceTest.java +++ b/main/src/test/java/rocks/artur/RestServiceTest.java @@ -21,7 +21,7 @@ import static org.mockserver.model.HttpRequest.request; import static org.mockserver.model.HttpResponse.response; -@ActiveProfiles("dev") +@ActiveProfiles("h2") @RunWith(SpringRunner.class) @SpringBootTest(webEnvironment = SpringBootTest.WebEnvironment.DEFINED_PORT) class RestServiceTest { diff --git a/main/src/test/resources/application.properties b/main/src/test/resources/application.properties index 54a77b3..14950ed 100644 --- a/main/src/test/resources/application.properties +++ b/main/src/test/resources/application.properties @@ -1,5 +1,4 @@ spring.sql.init.mode=always -spring.sql.init.platform=h2 spring.jpa.hibernate.ddl-auto=none spring.datasource.url=jdbc:h2:mem:default;DB_CLOSE_DELAY=-1 diff --git a/main/src/test/resources/data.sql b/main/src/test/resources/data.sql index 49edfe4..600a4c7 100644 --- a/main/src/test/resources/data.sql +++ b/main/src/test/resources/data.sql @@ -1,7 +1,7 @@ INSERT INTO characterisationresult (id, file_path, property, source, property_value, value_type) VALUES -(10,'/home/conftest1', 'FORMAT', 'file utility:5.03', 'Portable Document Format', 'STRING'), +(16,'/home/conftest1', 'FORMAT', 'file utility:5.03', 'Portable Document Format', 'STRING'), (11,'/home/conftest1', 'FORMAT', 'Droid:3', 'MS Word', 'STRING'), diff --git a/pom.xml b/pom.xml index f45c938..f00eae9 100644 --- a/pom.xml +++ b/pom.xml @@ -183,40 +183,5 @@ - - - docker - - docker - - - - - org.springframework.boot - spring-boot-maven-plugin - ${spring-boot.version} - - - - - - - dev - - dev - - - true - - - - - org.springframework.boot - spring-boot-maven-plugin - ${spring-boot.version} - - - - - + \ No newline at end of file diff --git a/auto.post b/utils/auto.post similarity index 100% rename from auto.post rename to utils/auto.post diff --git a/auto.sh b/utils/auto.sh similarity index 89% rename from auto.sh rename to utils/auto.sh index 885ae7f..d68cabd 100644 --- a/auto.sh +++ b/utils/auto.sh @@ -1,5 +1,5 @@ #!/bin/bash -source /home/artur/rnd/git/fitsinn/.venv/bin/activate +source ${PWD}/../.venv/bin/activate ./mvnw -pl -web -DskipTests clean install ./mvnw spring-boot:run -f main/pom.xml & diff --git a/utils/clickhouse/config.xml b/utils/clickhouse/config.xml new file mode 100644 index 0000000..6c4a403 --- /dev/null +++ b/utils/clickhouse/config.xml @@ -0,0 +1,1038 @@ + + + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + + 1000M + 10 + + + + + + + + + + + + + + 8123 + + + 9000 + + + 9004 + + + + + + + + + + + + + + + 9009 + + + + + + + + + + + + + + + + + + + + + + + + + + + + 4096 + + + 3 + + + + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + deflate + + + medium + + + -1 + -1 + + + false + + + + + + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + + /etc/clickhouse-server/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + + true + true + sslv2,sslv3 + true + + + + RejectCertificateHandler + + + + + + + + + 100 + + + 0 + + + + 10000 + + + 0.9 + + + 4194304 + + + 0 + + + + + + 8589934592 + + + 5368709120 + + + + /var/lib/clickhouse/ + + + /var/lib/clickhouse/tmp/ + + + + + + /var/lib/clickhouse/user_files/ + + + + + + + + + + + users.xml + + + + /var/lib/clickhouse/access/ + + + + + + + default + + + + + + + + + + + + default + + + + + + + + + true + + + false + + + + + + + + + + + + + + + localhost + 9000 + + + + + + + + + localhost + 9000 + + + + + localhost + 9000 + + + + + + + 127.0.0.1 + 9000 + + + + + 127.0.0.2 + 9000 + + + + + + true + + 127.0.0.1 + 9000 + + + + true + + 127.0.0.2 + 9000 + + + + + + + localhost + 9440 + 1 + + + + + + + localhost + 9000 + + + + + localhost + 1 + + + + + + + + + + + + + + + + + + + + + + + + 3600 + + + + 3600 + + + 60 + + + + + + + + + + + + + system + query_log
+ + toYYYYMM(event_date) + + + + + + 7500 +
+ + + + system + trace_log
+ + toYYYYMM(event_date) + 7500 +
+ + + + system + query_thread_log
+ toYYYYMM(event_date) + 7500 +
+ + + + + + + + system + metric_log
+ 7500 + 1000 +
+ + + + system + asynchronous_metric_log
+ + 60000 +
+ + + + + + engine MergeTree + partition by toYYYYMM(finish_date) + order by (finish_date, finish_time_us, trace_id) + + system + opentelemetry_span_log
+ 7500 +
+ + + + + system + crash_log
+ + + 1000 +
+ + + + + + + + + + + + + + + + + + *_dictionary.xml + + + + + + + + /clickhouse/task_queue/ddl + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + click_cost + any + + 0 + 3600 + + + 86400 + 60 + + + + max + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + + + /var/lib/clickhouse/format_schemas/ + + + + + hide encrypt/decrypt arguments + ((?:aes_)?(?:encrypt|decrypt)(?:_mysql)?)\s*\(\s*(?:'(?:\\'|.)+'|.*?)\s*\) + + \1(???) + + + + + + + + + + false + + false + + + https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 + + + + +
\ No newline at end of file diff --git a/utils/clickhouse/initdb.sql b/utils/clickhouse/initdb.sql new file mode 100644 index 0000000..d97dce4 --- /dev/null +++ b/utils/clickhouse/initdb.sql @@ -0,0 +1,10 @@ +CREATE TABLE characterisationresult +( + file_path String, + property String, + source String, + property_value String, + value_type String +) ENGINE = ReplacingMergeTree + PRIMARY KEY (source, property, file_path) + ORDER BY (source, property, file_path); diff --git a/utils/clickhouse/users.xml b/utils/clickhouse/users.xml new file mode 100644 index 0000000..2c5b9f7 --- /dev/null +++ b/utils/clickhouse/users.xml @@ -0,0 +1,58 @@ + + + + + + + + 10000000000 + + + random + + + + + 1 + + + + + + + 4acfe3202a5ff5cf467898fc58aab1d615029441 + + ::/0 + + default + default + 1 + + + + + + + + + + + 3600 + + + 0 + 0 + 0 + 0 + 0 + + + + \ No newline at end of file diff --git a/fileupload.py b/utils/fileupload.py similarity index 100% rename from fileupload.py rename to utils/fileupload.py diff --git a/fileupload.sh b/utils/fileupload.sh similarity index 100% rename from fileupload.sh rename to utils/fileupload.sh diff --git a/mysql-cluster/mysql-router.env b/utils/mysql-cluster/mysql-router.env similarity index 100% rename from mysql-cluster/mysql-router.env rename to utils/mysql-cluster/mysql-router.env diff --git a/mysql-cluster/mysql-server.env b/utils/mysql-cluster/mysql-server.env similarity index 100% rename from mysql-cluster/mysql-server.env rename to utils/mysql-cluster/mysql-server.env diff --git a/mysql-cluster/mysql-shell.env b/utils/mysql-cluster/mysql-shell.env similarity index 100% rename from mysql-cluster/mysql-shell.env rename to utils/mysql-cluster/mysql-shell.env diff --git a/mysql-cluster/scripts/db.sql b/utils/mysql-cluster/scripts/db.sql similarity index 100% rename from mysql-cluster/scripts/db.sql rename to utils/mysql-cluster/scripts/db.sql diff --git a/mysql-cluster/scripts/setupCluster.js b/utils/mysql-cluster/scripts/setupCluster.js similarity index 100% rename from mysql-cluster/scripts/setupCluster.js rename to utils/mysql-cluster/scripts/setupCluster.js diff --git a/nginx.conf b/utils/nginx/nginx.conf similarity index 100% rename from nginx.conf rename to utils/nginx/nginx.conf diff --git a/web/frontend/src/AppConfig.jsx b/web/frontend/src/AppConfig.jsx index 17aa48b..d59ecef 100644 --- a/web/frontend/src/AppConfig.jsx +++ b/web/frontend/src/AppConfig.jsx @@ -15,13 +15,34 @@ const AppConfig = () => { "globalProperties", [] ); + const [globalStatistics, setGlobalStatistics] = useSessionStorage( + "globalStatistics", + [] + ); + + var myHeaders = new Headers(); + myHeaders.append("Content-Type", "application/json"); + + var requestOptions = { + method: "POST", + headers: myHeaders, + redirect: "follow", + }; const fetchGlobalProperties = async () => { + await fetch(BACKEND_URL + "/statistics?", requestOptions); const response = await fetch(BACKEND_URL + "/properties"); let data = await response.json(); let properties = data.map((prop) => prop.property); setGlobalProperties(properties); }; + + const fetchGlobalStatistics = async () => { + const response = await fetch(BACKEND_URL + "/statistics?", requestOptions); + let data = await response.json(); + setGlobalStatistics(data); + }; + const fetchHealth = async () => { try { const response = await fetch(BACKEND_URL + "/health"); @@ -32,9 +53,14 @@ const AppConfig = () => { } }; + const fetchInitialData = async () => { + await fetchHealth(); + await fetchGlobalProperties(); + await fetchGlobalStatistics(); + }; + useEffect(() => { - fetchHealth(); - fetchGlobalProperties(); + fetchInitialData(); }, []); return ( diff --git a/web/frontend/src/scenes/dashboard/index.jsx b/web/frontend/src/scenes/dashboard/index.jsx index a8423ef..db97df2 100644 --- a/web/frontend/src/scenes/dashboard/index.jsx +++ b/web/frontend/src/scenes/dashboard/index.jsx @@ -13,20 +13,25 @@ import {uniqueProperties} from "../../components/Filter"; const Dashboard = () => { const theme = useTheme(); const colors = tokens(theme.palette.mode); - const [sizeStatistics, setSizeStatistics] = useState([ - { - totalSize: 10047, - avgSize: 3349, - minSize: 4, - maxSize: 10000, - conflictRate: 0.17, - }, - ]); const [properties, setProperties] = useState([]); const [filter, setFilter] = useSessionStorage("filterString", ""); + + const [globalStatistics, setGlobalStatistics] = useSessionStorage( + "globalStatistics", + [ + { + totalSize: 10047, + avgSize: 3349, + minSize: 4, + maxSize: 10000, + conflictRate: 0.17, + }, + ] + ); + const [globalProperties, setGlobalProperties] = useSessionStorage( "globalProperties", [] @@ -65,14 +70,17 @@ const Dashboard = () => { requestOptions ); const data = await response.json(); - setSizeStatistics(data); + setGlobalStatistics(data); + }; + + const fetchData = async () => { + await fetchStatistics(); + await fetchGlobalProperties(); }; useEffect(() => { console.log("loading the dashboard"); - - fetchStatistics(); - fetchGlobalProperties(); + fetchData(); }, [filter]); const handleClick = () => { @@ -112,43 +120,43 @@ const Dashboard = () => { @@ -164,9 +172,9 @@ const Dashboard = () => { { redirect: "follow", }; + const params = new URLSearchParams(); + params.append('filter', filter) + params.append('algorithm',"SELECTIVE_FEATURE_DISTRIBUTION") + params.append('properties', ["FORMAT", "MIMETYPE", "FORMAT_VERSION"]) + const response = await fetch( BACKEND_URL + - "/samples?" + - new URLSearchParams({ - filter: filter, - properties: "FORMAT", - properties: "MIMETYPE", - algorithm: "SELECTIVE_FEATURE_DISTRIBUTION", - }), + `/samples?${params.toString()}`, requestOptions ); const data = await response.json();