diff --git a/core/src/main/java/rocks/artur/api/AnalyzePersistFile.java b/core/src/main/java/rocks/artur/api/AnalyzePersistFile.java index d015c1b..2bd9ea4 100644 --- a/core/src/main/java/rocks/artur/api/AnalyzePersistFile.java +++ b/core/src/main/java/rocks/artur/api/AnalyzePersistFile.java @@ -11,8 +11,8 @@ */ public interface AnalyzePersistFile { - Long uploadCharacterisationResults(ByteFile file); + Long uploadCharacterisationResults(ByteFile file, String datasetName); - Long uploadCharacterisationResults(List files); + Long uploadCharacterisationResults(List files, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetCollectionStatistics.java b/core/src/main/java/rocks/artur/api/GetCollectionStatistics.java index 4ee5ae3..67719cd 100644 --- a/core/src/main/java/rocks/artur/api/GetCollectionStatistics.java +++ b/core/src/main/java/rocks/artur/api/GetCollectionStatistics.java @@ -5,6 +5,6 @@ import java.util.Map; public interface GetCollectionStatistics { - Map getStatistics(FilterCriteria filterCriteria); + Map getStatistics(FilterCriteria filterCriteria, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetDatasetInfo.java b/core/src/main/java/rocks/artur/api/GetDatasetInfo.java new file mode 100644 index 0000000..be38089 --- /dev/null +++ b/core/src/main/java/rocks/artur/api/GetDatasetInfo.java @@ -0,0 +1,7 @@ +package rocks.artur.api; + +import java.util.List; + +public interface GetDatasetInfo { + List listDatasets(); +} diff --git a/core/src/main/java/rocks/artur/api/GetObjects.java b/core/src/main/java/rocks/artur/api/GetObjects.java index a6732f8..0adef3b 100644 --- a/core/src/main/java/rocks/artur/api/GetObjects.java +++ b/core/src/main/java/rocks/artur/api/GetObjects.java @@ -10,8 +10,8 @@ * This interface enables getting characterisation results. */ public interface GetObjects { - List getObjects(FilterCriteria filterCriteria); - Iterable getObject(String filePath); + List getObjects(FilterCriteria filterCriteria, String datasetName); + Iterable getObject(String filePath, String datasetName); - List getConflictsFromObject(String filePath); + List getConflictsFromObject(String filePath, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetProperties.java b/core/src/main/java/rocks/artur/api/GetProperties.java index b371c5e..2c6239c 100644 --- a/core/src/main/java/rocks/artur/api/GetProperties.java +++ b/core/src/main/java/rocks/artur/api/GetProperties.java @@ -9,6 +9,6 @@ * This interface enables getting a property distribution. */ public interface GetProperties { - List getProperties(); - List getProperties(FilterCriteria filter); + List getProperties(String datasetName); + List getProperties(FilterCriteria filter, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetPropertyValueDistribution.java b/core/src/main/java/rocks/artur/api/GetPropertyValueDistribution.java index 54733bb..01566ba 100644 --- a/core/src/main/java/rocks/artur/api/GetPropertyValueDistribution.java +++ b/core/src/main/java/rocks/artur/api/GetPropertyValueDistribution.java @@ -11,5 +11,5 @@ * This interface enables getting a property value distribution given a property name. */ public interface GetPropertyValueDistribution { - List getPropertyValueDistribution(Property propertyName, FilterCriteria filterCriteria); + List getPropertyValueDistribution(Property propertyName, FilterCriteria filterCriteria, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetPropertyValueDistributionWithFilter.java b/core/src/main/java/rocks/artur/api/GetPropertyValueDistributionWithFilter.java index 1b1ebac..17b5173 100644 --- a/core/src/main/java/rocks/artur/api/GetPropertyValueDistributionWithFilter.java +++ b/core/src/main/java/rocks/artur/api/GetPropertyValueDistributionWithFilter.java @@ -8,5 +8,5 @@ * This interface enables getting a property value distribution given a filter. */ public interface GetPropertyValueDistributionWithFilter { - List getPropertyValueDistributionWithFilter(String propertyName, String filter); + List getPropertyValueDistributionWithFilter(String propertyName, String filter, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetSamples.java b/core/src/main/java/rocks/artur/api/GetSamples.java index 4cc9506..96a80c2 100644 --- a/core/src/main/java/rocks/artur/api/GetSamples.java +++ b/core/src/main/java/rocks/artur/api/GetSamples.java @@ -15,7 +15,7 @@ public interface GetSamples { void setProperties(List properties); - Iterable getObjects(FilterCriteria filterCriteria); + Iterable getObjects(FilterCriteria filterCriteria, String datasetName); - List getSamplingInfo(FilterCriteria filterCriteria); + List getSamplingInfo(FilterCriteria filterCriteria, String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/GetSources.java b/core/src/main/java/rocks/artur/api/GetSources.java index b4b18c6..bd1e5c5 100644 --- a/core/src/main/java/rocks/artur/api/GetSources.java +++ b/core/src/main/java/rocks/artur/api/GetSources.java @@ -6,5 +6,5 @@ * This interface enables getting a property distribution. */ public interface GetSources { - List getSources(); + List getSources(String datasetName); } diff --git a/core/src/main/java/rocks/artur/api/ResolveConflicts.java b/core/src/main/java/rocks/artur/api/ResolveConflicts.java index a08ea5c..11126b7 100644 --- a/core/src/main/java/rocks/artur/api/ResolveConflicts.java +++ b/core/src/main/java/rocks/artur/api/ResolveConflicts.java @@ -1,5 +1,5 @@ package rocks.artur.api; public interface ResolveConflicts { - void run(); + void run(String datasetName); } diff --git a/core/src/main/java/rocks/artur/api_impl/AnalyzePersistFileImpl.java b/core/src/main/java/rocks/artur/api_impl/AnalyzePersistFileImpl.java index a9e0125..36f471a 100644 --- a/core/src/main/java/rocks/artur/api_impl/AnalyzePersistFileImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/AnalyzePersistFileImpl.java @@ -21,20 +21,20 @@ public AnalyzePersistFileImpl(CharacterisationResultProducer characterisationRes } @Override - public Long uploadCharacterisationResults(ByteFile file) { + public Long uploadCharacterisationResults(ByteFile file, String datasetName) { List characterisationResults = characterisationResultProducer.processFile(file); - characterisationResultGateway.addCharacterisationResults(characterisationResults); + characterisationResultGateway.addCharacterisationResults(characterisationResults, datasetName); return Long.valueOf(characterisationResults.size()); } @Override - public Long uploadCharacterisationResults(List files) { + public Long uploadCharacterisationResults(List files, String datasetName) { List characterisationResults = new ArrayList<>(); files.stream().forEach(file -> { List tmp = characterisationResultProducer.processFile(file); characterisationResults.addAll(tmp); }); - characterisationResultGateway.addCharacterisationResults(characterisationResults); + characterisationResultGateway.addCharacterisationResults(characterisationResults, datasetName); return Long.valueOf(characterisationResults.size()); } } diff --git a/core/src/main/java/rocks/artur/api_impl/CRH_ResolveConflictsImpl.java b/core/src/main/java/rocks/artur/api_impl/CRH_ResolveConflictsImpl.java index 38d1911..ec76608 100644 --- a/core/src/main/java/rocks/artur/api_impl/CRH_ResolveConflictsImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/CRH_ResolveConflictsImpl.java @@ -22,33 +22,33 @@ public CRH_ResolveConflictsImpl(CharacterisationResultGateway characterisationRe } - public void run() { - init(); + public void run(String datasetName) { + init(datasetName); System.out.println(sourceWeights); //System.out.println("sum of weights: " + sourceWeights.values().stream().reduce(0d, Double::sum)); - updateTruth(); + updateTruth(datasetName); System.out.println("sum of weights: " + sourceWeights.values().stream().reduce(0d, Double::sum)); //System.out.println(truth); for (int i = 0; i < 3; i++) { - updateWeights(); + updateWeights(datasetName); System.out.println(sourceWeights); System.out.println("sum of weights: " + sourceWeights.values().stream().reduce(0d, Double::sum)); - updateTruth(); + updateTruth(datasetName); //System.out.println(truth); } - resolveConflicts(); + resolveConflicts(datasetName); } - private void resolveConflicts() { + private void resolveConflicts(String datasetName) { truth.entrySet().stream().forEach( entry -> { Entry key = entry.getKey(); String value = entry.getValue(); - List characterisationResultsByEntry = characterisationResultGateway.getCharacterisationResultsByEntry(key); + List characterisationResultsByEntry = characterisationResultGateway.getCharacterisationResultsByEntry(key, datasetName); for (CharacterisationResult characterisationResult : characterisationResultsByEntry) { if (!characterisationResult.getValue().equals(value)) { - characterisationResultGateway.delete(characterisationResult); + characterisationResultGateway.delete(characterisationResult, datasetName); } } @@ -56,7 +56,7 @@ private void resolveConflicts() { }); } - private void updateWeights() { + private void updateWeights(String datasetName) { Map score = sources.stream().collect(Collectors.toMap( Function.identity(), s -> 0.0)); @@ -66,10 +66,10 @@ private void updateWeights() { s -> 0.0)); - List entries = characterisationResultGateway.getEntries(); + List entries = characterisationResultGateway.getEntries(datasetName); for (Entry entry : entries) { - List characterisationResults = characterisationResultGateway.getCharacterisationResultsByEntry(entry); + List characterisationResults = characterisationResultGateway.getCharacterisationResultsByEntry(entry, datasetName); for (CharacterisationResult characterisationResult : characterisationResults) { @@ -112,10 +112,10 @@ private void updateWeights() { } } - private void updateTruth() { - List entries = characterisationResultGateway.getEntries(); + private void updateTruth(String datasetName) { + List entries = characterisationResultGateway.getEntries(datasetName); for (Entry entry : entries) { - List characterisationResults = characterisationResultGateway.getCharacterisationResultsByEntry(entry); + List characterisationResults = characterisationResultGateway.getCharacterisationResultsByEntry(entry, datasetName); if (characterisationResults.size() > 0) { CharacterisationResult firstResult = characterisationResults.get(0); @@ -140,9 +140,9 @@ private void updateTruth() { Map sourceWeights; Map truth; - void init() { + void init(String datasetName) { - sources = characterisationResultGateway.getSources(); + sources = characterisationResultGateway.getSources(datasetName); sourceWeights = sources.stream().collect(Collectors.toMap( Function.identity(), s -> 1.0 / sources.size())); diff --git a/core/src/main/java/rocks/artur/api_impl/GetCollectionStatisticsImpl.java b/core/src/main/java/rocks/artur/api_impl/GetCollectionStatisticsImpl.java index ea97869..10bcd1c 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetCollectionStatisticsImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetCollectionStatisticsImpl.java @@ -15,8 +15,8 @@ public GetCollectionStatisticsImpl(CharacterisationResultGateway characterisatio } @Override - public Map getStatistics(FilterCriteria filterCriteria) { - Map sizeStatistics = characterisationResultGateway.getCollectionStatistics(filterCriteria); + public Map getStatistics(FilterCriteria filterCriteria, String datasetName) { + Map sizeStatistics = characterisationResultGateway.getCollectionStatistics(filterCriteria, datasetName); return sizeStatistics; } diff --git a/core/src/main/java/rocks/artur/api_impl/GetDatasetInfoImpl.java b/core/src/main/java/rocks/artur/api_impl/GetDatasetInfoImpl.java new file mode 100644 index 0000000..a45f302 --- /dev/null +++ b/core/src/main/java/rocks/artur/api_impl/GetDatasetInfoImpl.java @@ -0,0 +1,19 @@ +package rocks.artur.api_impl; + +import rocks.artur.api.GetDatasetInfo; +import rocks.artur.domain.CharacterisationResultGateway; + +import java.util.List; + +public class GetDatasetInfoImpl implements GetDatasetInfo { + private CharacterisationResultGateway characterisationResultGateway; + + public GetDatasetInfoImpl(CharacterisationResultGateway characterisationResultGateway) { + this.characterisationResultGateway = characterisationResultGateway; + } + + @Override + public List listDatasets() { + return this.characterisationResultGateway.listDatasets(); + } +} diff --git a/core/src/main/java/rocks/artur/api_impl/GetObjectsImpl.java b/core/src/main/java/rocks/artur/api_impl/GetObjectsImpl.java index 61d2585..bffbbb2 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetObjectsImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetObjectsImpl.java @@ -16,20 +16,20 @@ public GetObjectsImpl(CharacterisationResultGateway characterisationResultGatewa } @Override - public List getObjects(FilterCriteria filterCriteria) { - List objects = characterisationResultGateway.getObjects(filterCriteria); + public List getObjects(FilterCriteria filterCriteria, String datasetName) { + List objects = characterisationResultGateway.getObjects(filterCriteria, datasetName); return objects; } @Override - public Iterable getObject(String filePath) { - Iterable characterisationResultsByFilepath = characterisationResultGateway.getCharacterisationResultsByFilepath(filePath); + public Iterable getObject(String filePath, String datasetName) { + Iterable characterisationResultsByFilepath = characterisationResultGateway.getCharacterisationResultsByFilepath(filePath, datasetName); return characterisationResultsByFilepath; } @Override - public List getConflictsFromObject(String filePath) { - List characterisationResultsByFilepath = characterisationResultGateway.getConflictsByFilepath(filePath); + public List getConflictsFromObject(String filePath, String datasetName) { + List characterisationResultsByFilepath = characterisationResultGateway.getConflictsByFilepath(filePath, datasetName); return characterisationResultsByFilepath; } diff --git a/core/src/main/java/rocks/artur/api_impl/GetPropertiesImpl.java b/core/src/main/java/rocks/artur/api_impl/GetPropertiesImpl.java index f1839d9..4c6d0a3 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetPropertiesImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetPropertiesImpl.java @@ -15,14 +15,14 @@ public GetPropertiesImpl(CharacterisationResultGateway characterisationResultGat } @Override - public List getProperties() { - List propertyDistribution = characterisationResultGateway.getPropertyDistribution(null); + public List getProperties(String datasetName) { + List propertyDistribution = characterisationResultGateway.getPropertyDistribution(null, datasetName); return propertyDistribution; } @Override - public List getProperties(FilterCriteria filter) { - List propertyDistribution = characterisationResultGateway.getPropertyDistribution(filter); + public List getProperties(FilterCriteria filter, String datasetName) { + List propertyDistribution = characterisationResultGateway.getPropertyDistribution(filter, datasetName); return propertyDistribution; } } diff --git a/core/src/main/java/rocks/artur/api_impl/GetPropertyValueDistributionImpl.java b/core/src/main/java/rocks/artur/api_impl/GetPropertyValueDistributionImpl.java index 792c79e..86f7ce9 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetPropertyValueDistributionImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetPropertyValueDistributionImpl.java @@ -18,8 +18,8 @@ public GetPropertyValueDistributionImpl(CharacterisationResultGateway characteri @Override - public List getPropertyValueDistribution(Property property, FilterCriteria filterCriteria) { - List valueDistributionByProperty = characterisationResultGateway.getPropertyValueDistribution(property, filterCriteria); + public List getPropertyValueDistribution(Property property, FilterCriteria filterCriteria, String datasetName) { + List valueDistributionByProperty = characterisationResultGateway.getPropertyValueDistribution(property, filterCriteria, datasetName); return valueDistributionByProperty; } } diff --git a/core/src/main/java/rocks/artur/api_impl/GetSamplesImpl.java b/core/src/main/java/rocks/artur/api_impl/GetSamplesImpl.java index 9df1966..0e3b98e 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetSamplesImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetSamplesImpl.java @@ -34,8 +34,8 @@ public void setProperties(List properties) { } @Override - public List getObjects(FilterCriteria filterCriteria) { - List samplingResults = characterisationResultGateway.getSamples(filterCriteria, algorithm, properties); + public List getObjects(FilterCriteria filterCriteria, String datasetName) { + List samplingResults = characterisationResultGateway.getSamples(filterCriteria, algorithm, properties, datasetName); List results = new ArrayList<>(); switch (algorithm) { @@ -50,8 +50,8 @@ public List getObjects(FilterCriteria filterCriteria) { } @Override - public List getSamplingInfo(FilterCriteria filterCriteria) { - List samplingResults = characterisationResultGateway.getSamples(filterCriteria, algorithm, properties); + public List getSamplingInfo(FilterCriteria filterCriteria, String datasetName) { + List samplingResults = characterisationResultGateway.getSamples(filterCriteria, algorithm, properties, datasetName); return samplingResults; } diff --git a/core/src/main/java/rocks/artur/api_impl/GetSourcesImpl.java b/core/src/main/java/rocks/artur/api_impl/GetSourcesImpl.java index 977ccee..c1631b6 100644 --- a/core/src/main/java/rocks/artur/api_impl/GetSourcesImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/GetSourcesImpl.java @@ -13,8 +13,8 @@ public GetSourcesImpl(CharacterisationResultGateway characterisationResultGatewa } @Override - public List getSources() { - List sources = characterisationResultGateway.getSources(); + public List getSources(String datasetName) { + List sources = characterisationResultGateway.getSources(datasetName); return sources; } } diff --git a/core/src/main/java/rocks/artur/api_impl/Native_ResolveConflictsImpl.java b/core/src/main/java/rocks/artur/api_impl/Native_ResolveConflictsImpl.java index 3847b5c..2a9581c 100644 --- a/core/src/main/java/rocks/artur/api_impl/Native_ResolveConflictsImpl.java +++ b/core/src/main/java/rocks/artur/api_impl/Native_ResolveConflictsImpl.java @@ -10,7 +10,7 @@ public Native_ResolveConflictsImpl(CharacterisationResultGateway characterisatio this.characterisationResultGateway = characterisationResultGateway; } @Override - public void run() { - characterisationResultGateway.resolveConflictsNative(); + public void run(String datasetName) { + characterisationResultGateway.resolveConflictsNative(datasetName); } } diff --git a/core/src/main/java/rocks/artur/api_impl/filter/FilterOperation.java b/core/src/main/java/rocks/artur/api_impl/filter/FilterOperation.java index d510842..058bd2d 100644 --- a/core/src/main/java/rocks/artur/api_impl/filter/FilterOperation.java +++ b/core/src/main/java/rocks/artur/api_impl/filter/FilterOperation.java @@ -1,6 +1,5 @@ package rocks.artur.api_impl.filter; -import rocks.artur.domain.ValueType; public enum FilterOperation { LESS("<"), LESS_OR_EQUAL ("<="), diff --git a/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java b/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java index a792246..ba2c7fc 100644 --- a/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java +++ b/core/src/main/java/rocks/artur/domain/CharacterisationResultGateway.java @@ -18,14 +18,14 @@ public interface CharacterisationResultGateway { * * @param characterisationResult */ - void addCharacterisationResult(CharacterisationResult characterisationResult); + void addCharacterisationResult(CharacterisationResult characterisationResult, String datasetName); /** * gets all characterisation results * * @return an iterable of all results stored in the DB. */ - List getCharacterisationResults(FilterCriteria filter); + List getCharacterisationResults(FilterCriteria filter, String datasetName); /** * gets a distribution of all properties that match the given filter criteria. @@ -33,58 +33,60 @@ public interface CharacterisationResultGateway { * @param filter a filter criteria * @return a list of property statistics */ - List getPropertyDistribution(FilterCriteria filter); + List getPropertyDistribution(FilterCriteria filter, String datasetName); /** * gets characterisation results describing a digital object identified by the given file path. * * @return an iterable of characterisation results. */ - List getCharacterisationResultsByFilepath(String filePath); + List getCharacterisationResultsByFilepath(String filePath, String datasetName); - List getCharacterisationResultsByEntry(Entry entry); + List getCharacterisationResultsByEntry(Entry entry, String datasetName); - List getConflictEntries(); + List getConflictEntries(String datasetName); - List getEntries(); + List getEntries(String datasetName); /** * gets a list of characterisation results with conflicts for a given digital object. * * @return an iterable of characterisation results. */ - List getConflictsByFilepath(String filepath); + List getConflictsByFilepath(String filepath, String datasetName); - Map getCollectionStatistics(FilterCriteria filterCriteria); + Map getCollectionStatistics(FilterCriteria filterCriteria, String datasetName); - List getPropertyValueDistribution(Property property, FilterCriteria filter); + List getPropertyValueDistribution(Property property, FilterCriteria filter, String datasetName); /** * gets a list of sources of characterisation results. * * @return an iterable of characterisation result sources. */ - List getSources(); + List getSources(String datasetName); /** * gets a list of objects. * * @return an iterable of PropertiesPerObjectStatistic. */ - List getObjects(FilterCriteria filterCriteria); + List getObjects(FilterCriteria filterCriteria, String datasetName); /** * gets a list of samples. * * @return an iterable of PropertiesPerObjectStatistic. */ - List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties); + List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties, String datasetName); - void addCharacterisationResults(List characterisationResults); + void addCharacterisationResults(List characterisationResults, String datasetName); - double getConflictRate(); + double getConflictRate(String datasetName); - void delete(CharacterisationResult characterisationResult); + void delete(CharacterisationResult characterisationResult, String datasetName); - void resolveConflictsNative(); + void resolveConflictsNative(String datasetName); + + List listDatasets(); } diff --git a/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultClickhouseRepository.java b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultClickhouseRepository.java index 1747e62..a6d8353 100644 --- a/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultClickhouseRepository.java +++ b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultClickhouseRepository.java @@ -16,6 +16,7 @@ import java.sql.PreparedStatement; import java.sql.SQLException; +import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; @@ -34,10 +35,10 @@ public CharacterisationResultClickhouseRepository(JdbcTemplate template) { this.template = template; } - public void save(CharacterisationResult characterisationResult) { - - int rowsInserted = template.update("insert into characterisationresult (file_path,property, source, property_value, value_type)" + - " values (?,?,?,?,?)", + public void save(CharacterisationResult characterisationResult, String datasetName) { + this.createDb(datasetName); + int rowsInserted = template.update(String.format("insert into %s.characterisationresult (file_path,property, source, property_value, value_type)" + + " values (?,?,?,?,?)", datasetName), characterisationResult.getFilePath(), characterisationResult.getProperty().name(), characterisationResult.getSource(), @@ -47,11 +48,11 @@ public void save(CharacterisationResult characterisationResult) { System.out.println("Number of rows updated = " + rowsInserted); } - public List getPropertyDistribution() { + public List getPropertyDistribution(String datasetName) { String sql = String.format( "select property, count(property_value) as number " + - "from characterisationresultaggregated " + - "group by property ORDER BY number desc LIMIT 200"); + "from %s.characterisationresultaggregated " + + "group by property ORDER BY number desc LIMIT 200", datasetName); List result = template.query(sql, (rs, rowNum) -> { PropertyStatistic propstat = new PropertyStatistic(rs.getLong("number"), Property.valueOf(rs.getString("property"))); @@ -60,17 +61,17 @@ public List getPropertyDistribution() { return result; } - public List getPropertyValueDistribution(String property, FilterCriteria filter) { + public List getPropertyValueDistribution(String property, FilterCriteria filter, String datasetName) { String subquery = ""; if (filter != null) { - subquery = convert(filter); + subquery = convert(filter, datasetName); subquery = String.format(" file_path in (%s) and ", subquery); } String sql = String.format( "select property, property_value, count(property_value) as number " + - "from characterisationresultaggregated " + - "where %s property = '%s' group by property, property_value ORDER BY number desc LIMIT 200", subquery, property); + "from %s.characterisationresultaggregated " + + "where %s property = '%s' group by property, property_value ORDER BY number desc LIMIT 200", datasetName, subquery, property); LOG.info(sql); List result = template.query(sql, (rs, rowNum) -> { Object[] item = new Object[3]; @@ -83,10 +84,10 @@ public List getPropertyValueDistribution(String property, FilterCriter } - public List getPropertyValueTimeStampDistribution(String property, FilterCriteria filter) { + public List getPropertyValueTimeStampDistribution(String property, FilterCriteria filter, String datasetName) { String subquery = ""; if (filter != null) { - subquery = convert(filter); + subquery = convert(filter, datasetName); subquery = String.format(" file_path in (%s) and ", subquery); } @@ -95,11 +96,11 @@ public List getPropertyValueTimeStampDistribution(String property, Fil "WHEN property_value = 'CONFLICT' THEN property_value " + "ELSE SUBSTRING(property_value,1,4) " + "END as value, count(property) as number " + - "from characterisationresultaggregated " + + "from %s.characterisationresultaggregated " + "where %s property = '%s' group by property, CASE " + "WHEN property_value = 'CONFLICT' THEN property_value " + "ELSE SUBSTRING(property_value,1,4) " + - "END ORDER BY number desc LIMIT 200", subquery, property); + "END ORDER BY number desc LIMIT 200", datasetName, subquery, property); List result = template.query(sql, (rs, rowNum) -> { Object[] item = new Object[3]; @@ -112,7 +113,7 @@ public List getPropertyValueTimeStampDistribution(String property, Fil } - public String convert(FilterCriteria filter) { + public String convert(FilterCriteria filter, String datasetName) { if (filter instanceof SingleFilterCriteria) { Property property = ((SingleFilterCriteria) filter).getSearchKey(); String operator = ((SingleFilterCriteria) filter).getOperation().getValue(); @@ -121,20 +122,20 @@ public String convert(FilterCriteria filter) { switch (property.getValueType()) { case TIMESTAMP: if (!value.equals("CONFLICT")) { - result = String.format("select file_path from characterisationresult where property = '%s' and cast(property_value as DATETIME) %s cast('%s' as DATE)", property, operator, value); + result = String.format("select file_path from %s.characterisationresult where property = '%s' and cast(property_value as DATETIME) %s cast('%s' as DATE)", datasetName, property, operator, value); } else { - result = String.format("select file_path from characterisationresultaggregated where property = '%s' and property_value %s '%s'", property, operator, value); + result = String.format("select file_path from %s.characterisationresultaggregated where property = '%s' and property_value %s '%s'", datasetName, property, operator, value); } break; default: - result = String.format("select file_path from characterisationresultaggregated where property = '%s' and property_value %s '%s'", property, operator, value); + result = String.format("select file_path from %s.characterisationresultaggregated where property = '%s' and property_value %s '%s'", datasetName, property, operator, value); } return result; } else if (filter instanceof AndFilterCriteria) { AndFilterCriteria andFilter = (AndFilterCriteria) filter; - String whereStatement1 = convert(andFilter.getCriteria()); - String whereStatement2 = convert(andFilter.getOtherCriteria()); + String whereStatement1 = convert(andFilter.getCriteria(), datasetName); + String whereStatement2 = convert(andFilter.getOtherCriteria(), datasetName); String result = String.format("( (%s) INTERSECT (%s) )", whereStatement1, whereStatement2); return result; @@ -142,8 +143,8 @@ public String convert(FilterCriteria filter) { } else if (filter instanceof OrFilterCriteria) { OrFilterCriteria orFilter = (OrFilterCriteria) filter; - String whereStatement1 = convert(orFilter.getCriteria()); - String whereStatement2 = convert(orFilter.getOtherCriteria()); + String whereStatement1 = convert(orFilter.getCriteria(), datasetName); + String whereStatement2 = convert(orFilter.getOtherCriteria(), datasetName); String result = String.format("( (%s) UNION ALL (%s) )", whereStatement1, whereStatement2); return result; @@ -152,14 +153,14 @@ public String convert(FilterCriteria filter) { } } - public void saveAll(List characterisationResults) { - + public void saveAll(List characterisationResults, String datasetName) { + this.createDb(datasetName); List filtered = characterisationResults.stream() .filter(item -> item.getFilePath() != null) .filter(item -> item.getValue() != null && item.getValue().length() < 300).collect(Collectors.toList()); - template.batchUpdate("insert into characterisationresult (file_path,property, source, property_value, value_type)" + - " values (?,?,?,?,?)", + template.batchUpdate(String.format("insert into %s.characterisationresult (file_path,property, source, property_value, value_type)" + + " values (?,?,?,?,?)", datasetName), filtered, 10000, new ParameterizedPreparedStatementSetter() { @@ -175,18 +176,18 @@ public void setValues(PreparedStatement ps, CharacterisationResult cResult) thro } - public List getCharacterisationResults(FilterCriteria filter) { + public List getCharacterisationResults(FilterCriteria filter, String datasetName) { String subquery = ""; if (filter != null) { - subquery = convert(filter); + subquery = convert(filter, datasetName); subquery = String.format("where file_path in (%s) ", subquery); } String sql = String.format( "select file_path,property, source, property_value, value_type " + - "from characterisationresult " + - "%s", subquery); + "from %s.characterisationresult " + + "%s", datasetName, subquery); List result = template.query(sql, (rs, rowNum) -> { CharacterisationResult item = new CharacterisationResult(); @@ -200,25 +201,25 @@ public List getCharacterisationResults(FilterCriteria getSources() { + public List getSources(String datasetName) { String sql = String.format( - "select distinct source from characterisationresult "); + "select distinct source from %s.characterisationresult ", datasetName); List result = template.query(sql, (rs, rowNum) -> { return rs.getString(1); @@ -226,11 +227,11 @@ public List getSources() { return result; } - public List getCharacterisationResultsByFilepath(String filePath) { + public List getCharacterisationResultsByFilepath(String filePath, String datasetName) { String sql = String.format( "select file_path, property, source, property_value, value_type " + - "from characterisationresult " + - "where file_path='%s' ", filePath); + "from %s.characterisationresult " + + "where file_path='%s' ", datasetName, filePath); List result = template.query(sql, (rs, rowNum) -> { CharacterisationResult item = new CharacterisationResult(); @@ -244,10 +245,10 @@ public List getCharacterisationResultsByFilepath(String return result; } - public double[] getSizeStatistics(FilterCriteria filter) { + public double[] getSizeStatistics(FilterCriteria filter, String datasetName) { String subquery = ""; if (filter != null) { - subquery = convert(filter); + subquery = convert(filter, datasetName); subquery = String.format(" file_path in (%s) and ", subquery); } @@ -257,8 +258,8 @@ public double[] getSizeStatistics(FilterCriteria filter) { "max(toInt32(property_value)) as maxsize, " + "avg(toInt32(property_value)) as avgsize, " + "count(property_value) as count " + - "from characterisationresultaggregated " + - "where %s property='SIZE'", subquery); + "from %s.characterisationresultaggregated " + + "where %s property='SIZE'", datasetName, subquery); List result = template.query(sql, (rs, rowNum) -> { double sum = rs.getDouble(1); @@ -273,31 +274,31 @@ public double[] getSizeStatistics(FilterCriteria filter) { } - public double[] getConflictStatistics(FilterCriteria filter) { + public double[] getConflictStatistics(FilterCriteria filter, String datasetName) { String subquery = ""; if (filter != null) { - subquery = convert(filter); + subquery = convert(filter, datasetName); subquery = String.format(" file_path in (%s) and ", subquery); } String sql = String.format( "select count(distinct file_path) as count " + - "from characterisationresultaggregated " + - "where %s property_value='CONFLICT'", subquery); + "from %s.characterisationresultaggregated " + + "where %s property_value='CONFLICT'", datasetName, subquery); Long conflictsCount = template.queryForObject(sql, Long.class); String subquery2 = ""; if (filter != null) { - subquery2 = convert(filter); + subquery2 = convert(filter, datasetName); subquery2 = String.format("where file_path in (%s) ", subquery2); } String sql2 = String.format( "select count(distinct file_path) as count " + - "from characterisationresultaggregated " + - "%s", subquery2); + "from %s.characterisationresultaggregated " + + "%s", datasetName, subquery2); Long totalCount = template.queryForObject(sql2, Long.class); @@ -309,18 +310,18 @@ public double[] getConflictStatistics(FilterCriteria filter) { return result; } - public List getObjects(FilterCriteria filter) { + public List getObjects(FilterCriteria filter, String datasetName) { String subquery = ""; if (filter != null) { - subquery = convert(filter); + subquery = convert(filter, datasetName); subquery = String.format(" where file_path in (%s) ", subquery); } String sql = String.format( "select file_path, count(*) " + - "from characterisationresultaggregated " + + "from %s.characterisationresultaggregated " + " %s" + - "group by file_path", subquery); + "group by file_path", datasetName, subquery); List result = template.query(sql, (rs, rowNum) -> { PropertiesPerObjectStatistic statistic = new PropertiesPerObjectStatistic(rs.getLong(2), rs.getString(1)); @@ -331,18 +332,18 @@ public List getObjects(FilterCriteria filter) { return result; } - public List getRandomSamples(FilterCriteria filterCriteria, int sampleSize) { + public List getRandomSamples(FilterCriteria filterCriteria, int sampleSize, String datasetName) { String subquery = ""; if (filterCriteria != null) { - subquery = convert(filterCriteria); + subquery = convert(filterCriteria, datasetName); subquery = String.format(" where file_path in (%s) ", subquery); } String sql = String.format( "select file_path " + - "from characterisationresultaggregated " + + "from %s.characterisationresultaggregated " + " %s" + - "group by file_path ORDER BY RAND() LIMIT %d ", subquery, sampleSize); + "group by file_path ORDER BY RAND() LIMIT %d ", datasetName, subquery, sampleSize); List resultList = template.query(sql, (rs, rowNum) -> rs.getString(1)); List collect = resultList.stream().map(item -> new String[]{"1", item}).collect(Collectors.toList()); @@ -351,10 +352,10 @@ public List getRandomSamples(FilterCriteria filterCriteria, int sample } - public List getSelectiveFeatureDistributionSamples(FilterCriteria filterCriteria, List properties) { + public List getSelectiveFeatureDistributionSamples(FilterCriteria filterCriteria, List properties, String datasetName) { String subquery = ""; if (filterCriteria != null) { - subquery = convert(filterCriteria); + subquery = convert(filterCriteria, datasetName); subquery = String.format(" where file_path in (%s) ", subquery); } @@ -377,11 +378,11 @@ public List getSelectiveFeatureDistributionSamples(FilterCriteria filt String currProperty = properties.get(i).name(); if (i == 0) { - from.append(String.format(" (SELECT v.property_value, v.file_path FROM characterisationresultaggregated v\n" + - "where %s v.property='%s' ) as %s ", subquery, currProperty, currProperty)); + from.append(String.format(" (SELECT v.property_value, v.file_path FROM %s.characterisationresultaggregated v\n" + + "where %s v.property='%s' ) as %s ", datasetName, subquery, currProperty, currProperty)); } else { - from.append(String.format(" join (SELECT v.property_value, v.file_path FROM characterisationresultaggregated v\n" + - "where %s v.property='%s') as %s on %s.file_path=%s.file_path ", subquery, currProperty, currProperty, properties.get(0).name(), currProperty)); + from.append(String.format(" join (SELECT v.property_value, v.file_path FROM %s.characterisationresultaggregated v\n" + + "where %s v.property='%s') as %s on %s.file_path=%s.file_path ", datasetName, subquery, currProperty, currProperty, properties.get(0).name(), currProperty)); } //TODO: Probably, the join is not required. Check if it is true. } @@ -412,7 +413,7 @@ public List getSelectiveFeatureDistributionSamples(FilterCriteria filt } - public void resolveConflictsSimple(){ + public void resolveConflictsSimple(String datasetName){ /* DROP TABLE IF EXISTS to_delete; @@ -449,51 +450,51 @@ tmp_table as ( */ - String sql = String.format("DROP TABLE IF EXISTS to_delete;"); + String sql = String.format("DROP TABLE IF EXISTS %s.to_delete;", datasetName); int update = template.update(sql); sql = String.format("" + - " CREATE TABLE to_delete\n" + + " CREATE TABLE %s.to_delete\n" + " (\n" + " file_path String,\n" + " property String,\n" + " source String\n" + - " ) ENGINE = Memory;"); + " ) ENGINE = Memory;", datasetName); update = template.update(sql); sql = String.format("" + - " insert into to_delete\n" + + " insert into %s.to_delete\n" + " with weights as (\n" + " SELECT source,\n" + " property,\n" + " COUNT(property_value) as count,\n" + - " COUNT(property_value) * 1.0/ (SELECT count(property_value) FROM characterisationresultaggregated\n" + + " COUNT(property_value) * 1.0/ (SELECT count(property_value) FROM %s.characterisationresultaggregated\n" + " WHERE property_value != 'CONFLICT' ) as weight\n" + - " FROM characterisationresult\n" + - " WHERE file_path in (SELECT file_path FROM characterisationresultaggregated WHERE property_value != 'CONFLICT' )\n" + + " FROM %s.characterisationresult\n" + + " WHERE file_path in (SELECT file_path FROM %s.characterisationresultaggregated WHERE property_value != 'CONFLICT' )\n" + " GROUP BY source, property\n" + " ),\n" + " tmp_table as (\n" + - " SELECT file_path, property, source, property_value, weight FROM characterisationresult\n" + - " JOIN weights on characterisationresult.property == weights.property and characterisationresult.source == weights.source\n" + - " WHERE (file_path, property) in (SELECT file_path, property from characterisationresultaggregated WHERE property_value == 'CONFLICT')\n" + + " SELECT file_path, property, source, property_value, weight FROM %s.characterisationresult\n" + + " JOIN weights on %s.characterisationresult.property == weights.property and %s.characterisationresult.source == weights.source\n" + + " WHERE (file_path, property) in (SELECT file_path, property from %s.characterisationresultaggregated WHERE property_value == 'CONFLICT')\n" + " )\n" + " SELECT file_path,property,source FROM tmp_table\n" + - " WHERE (file_path, property, weight) not in (SELECT file_path, property, MAX(weight) FROM tmp_table GROUP BY file_path, property);"); + " WHERE (file_path, property, weight) not in (SELECT file_path, property, MAX(weight) FROM tmp_table GROUP BY file_path, property);", datasetName, datasetName, datasetName, datasetName, datasetName, datasetName, datasetName, datasetName); update = template.update(sql); sql = String.format("" + - " delete from characterisationresult\n" + - " where (file_path, property, source) in (select file_path,property,source from to_delete);"); + " delete from %s.characterisationresult\n" + + " where (file_path, property, source) in (select file_path,property,source from %s.to_delete);", datasetName, datasetName); update = template.update(sql); - this.cleanAggregation(); + this.cleanAggregation(datasetName); } - void aggregateResults(){ + void aggregateResults(String datasetName){ /* CREATE TABLE IF NOT EXISTS characterisationresultaggregated ENGINE = AggregatingMergeTree @@ -507,7 +508,7 @@ WHEN COUNT(distinct property_value) = 1 THEN MIN(property_value) GROUP BY property, file_path; */ String sql = String.format("" + - "CREATE TABLE IF NOT EXISTS characterisationresultaggregated\n" + + "CREATE TABLE IF NOT EXISTS %s.characterisationresultaggregated\n" + "ENGINE = AggregatingMergeTree\n" + " ORDER BY (property, file_path) AS\n" + "SELECT file_path, property,\n" + @@ -515,15 +516,57 @@ WHEN COUNT(distinct property_value) = 1 THEN MIN(property_value) " WHEN COUNT(distinct property_value) = 1 THEN MIN(property_value)\n" + " ELSE 'CONFLICT'\n" + " END AS property_value\n" + - "FROM characterisationresult\n" + - "GROUP BY property, file_path;" + "FROM %s.characterisationresult\n" + + "GROUP BY property, file_path;", datasetName, datasetName ); template.update(sql); } - void cleanAggregation(){ - String sql = String.format("drop table IF EXISTS characterisationresultaggregated"); + void cleanAggregation(String datasetName){ + String sql = String.format("drop table IF EXISTS %s.characterisationresultaggregated", datasetName); int update = template.update(sql); } + + void createDb(String datasetName) { + String sql = String.format("create database if not exists %s", datasetName); + int update = template.update(sql); + + + + /* + + CREATE TABLE newdb.characterisationresult + ( + file_path String, + property String, + source String, + property_value String, + value_type String + ) ENGINE = ReplacingMergeTree + PRIMARY KEY (source, property, file_path) + ORDER BY (source, property, file_path); + + */ + sql = String.format("CREATE TABLE IF NOT EXISTS %s.characterisationresult\n" + + "(\n" + + " file_path String,\n" + + " property String,\n" + + " source String,\n" + + " property_value String,\n" + + " value_type String\n" + + ") ENGINE = ReplacingMergeTree\n" + + " PRIMARY KEY (source, property, file_path)\n" + + " ORDER BY (source, property, file_path);", datasetName); + update = template.update(sql); + } + + public List listDatasets() { + String sql = String.format("SELECT name FROM system.databases"); + + List resultList = template.query(sql, (rs, rowNum) -> rs.getString(1)); + List to_remove = Arrays.asList("system", "information_schema", "INFORMATION_SCHEMA"); + resultList.removeAll(to_remove); + return resultList; + } } diff --git a/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultGatewayClickhouseImpl.java b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultGatewayClickhouseImpl.java index dc35f24..9f2559e 100644 --- a/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultGatewayClickhouseImpl.java +++ b/infra-persistence/src/main/java/rocks/artur/clickhouse/CharacterisationResultGatewayClickhouseImpl.java @@ -23,46 +23,47 @@ public CharacterisationResultGatewayClickhouseImpl(CharacterisationResultClickho @Override - public void addCharacterisationResult(CharacterisationResult characterisationResult) { - repository.save(characterisationResult); - repository.cleanAggregation(); + public void addCharacterisationResult(CharacterisationResult characterisationResult, String datasetName) { + repository.save(characterisationResult, datasetName); + repository.cleanAggregation(datasetName); } @Override - public List getCharacterisationResults(FilterCriteria filter) { - return repository.getCharacterisationResults(filter); + public List getCharacterisationResults(FilterCriteria filter, String datasetName) { + return repository.getCharacterisationResults(filter, datasetName); } @Override - public List getPropertyDistribution(FilterCriteria filter) { - return repository.getPropertyDistribution(); + public List getPropertyDistribution(FilterCriteria filter, String datasetName) { + repository.aggregateResults(datasetName); + return repository.getPropertyDistribution(datasetName); } @Override - public List getCharacterisationResultsByFilepath(String filePath) { - return repository.getCharacterisationResultsByFilepath(filePath); + public List getCharacterisationResultsByFilepath(String filePath, String datasetName) { + return repository.getCharacterisationResultsByFilepath(filePath,datasetName); } @Override - public List getCharacterisationResultsByEntry(Entry entry) { + public List getCharacterisationResultsByEntry(Entry entry, String datasetName) { return null; } @Override - public List getConflictEntries() { + public List getConflictEntries(String datasetName) { return null; } @Override - public List getEntries() { + public List getEntries(String datasetName) { return null; } @Override - public List getConflictsByFilepath(String filepath) { - repository.aggregateResults(); + public List getConflictsByFilepath(String filepath, String datasetName) { + repository.aggregateResults(datasetName); List results = new ArrayList<>(); - List allJPAByFilePath = getCharacterisationResultsByFilepath(filepath); + List allJPAByFilePath = getCharacterisationResultsByFilepath(filepath, datasetName); List properties = allJPAByFilePath.stream().map(item -> item.getProperty()).collect(Collectors.toList()); for (Property property : properties) { @@ -75,31 +76,32 @@ public List getConflictsByFilepath(String filepath) { } @Override - public Map getCollectionStatistics(FilterCriteria filterCriteria) { - repository.aggregateResults(); + public Map getCollectionStatistics(FilterCriteria filterCriteria, String datasetName) { + + repository.aggregateResults(datasetName); Map result = new HashMap<>(); - double[] sizeStatistics = repository.getSizeStatistics(filterCriteria); + double[] sizeStatistics = repository.getSizeStatistics(filterCriteria, datasetName); result.put("totalSize", sizeStatistics[0]); result.put("minSize", sizeStatistics[1]); result.put("maxSize", sizeStatistics[2]); result.put("avgSize", sizeStatistics[3]); result.put("totalCount", sizeStatistics[4]); - double[] conflictStatistics = repository.getConflictStatistics(filterCriteria); + double[] conflictStatistics = repository.getConflictStatistics(filterCriteria, datasetName); result.put("conflictRate", conflictStatistics[1]); result.put("conflictCount", conflictStatistics[0]); return result; } @Override - public List getPropertyValueDistribution(Property property, FilterCriteria filter) { - repository.aggregateResults(); + public List getPropertyValueDistribution(Property property, FilterCriteria filter, String datasetName) { + repository.aggregateResults(datasetName); switch (property.getValueType()) { case TIMESTAMP: { List collect = null; List propertyValueDistribution = - repository.getPropertyValueTimeStampDistribution(property.name(), filter); + repository.getPropertyValueTimeStampDistribution(property.name(), filter, datasetName); collect = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0])) .map(stat -> new PropertyValueStatistic((Long) stat[2], (String) stat[1])) .collect(Collectors.toList()); @@ -109,7 +111,7 @@ public List getPropertyValueDistribution(Property proper case INTEGER: case FLOAT: { List propertyValueDistribution = - repository.getPropertyValueDistribution(property.name(), filter); + repository.getPropertyValueDistribution(property.name(), filter, datasetName); List floats = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0]) && !(stat[1].equals("CONFLICT"))) .map(stat -> { @@ -137,7 +139,7 @@ public List getPropertyValueDistribution(Property proper default: List collect = null; List propertyValueDistribution = - repository.getPropertyValueDistribution(property.name(), filter); + repository.getPropertyValueDistribution(property.name(), filter, datasetName); collect = propertyValueDistribution.stream().filter(stat -> property.name().equalsIgnoreCase((String) stat[0])) .map(stat -> new PropertyValueStatistic((Long) stat[2], (String) stat[1])) .collect(Collectors.toList()); @@ -147,25 +149,25 @@ public List getPropertyValueDistribution(Property proper } @Override - public List getSources() { - return repository.getSources(); + public List getSources(String datasetName) { + return repository.getSources(datasetName); } @Override - public List getObjects(FilterCriteria filterCriteria) { - return repository.getObjects(filterCriteria); + public List getObjects(FilterCriteria filterCriteria, String datasetName) { + return repository.getObjects(filterCriteria, datasetName); } @Override - public List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties) { - repository.aggregateResults(); + public List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties, String datasetName) { + repository.aggregateResults(datasetName); switch (algorithm) { case RANDOM -> { - List samples = repository.getRandomSamples(filterCriteria, 10); + List samples = repository.getRandomSamples(filterCriteria, 10, datasetName); return samples; } case SELECTIVE_FEATURE_DISTRIBUTION -> { - List selectiveFeatureDistributionSamples = repository.getSelectiveFeatureDistributionSamples(filterCriteria, properties); + List selectiveFeatureDistributionSamples = repository.getSelectiveFeatureDistributionSamples(filterCriteria, properties, datasetName); //List examples = selectiveFeatureDistributionSamples.stream().map(arr -> arr[1]).collect(Collectors.toList()); return selectiveFeatureDistributionSamples; } @@ -174,27 +176,32 @@ public List getSamples(FilterCriteria filterCriteria, SamplingAlgorith } @Override - public void addCharacterisationResults(List characterisationResults) { - repository.saveAll(characterisationResults); - repository.cleanAggregation(); + public void addCharacterisationResults(List characterisationResults, String datasetName) { + repository.saveAll(characterisationResults, datasetName); + repository.cleanAggregation(datasetName); } @Override - public double getConflictRate() { - repository.aggregateResults(); - Long totalCount = repository.getDigitalObjectCount(); - Long conflictCount = repository.getConflictCount(); + public double getConflictRate(String datasetName) { + repository.aggregateResults(datasetName); + Long totalCount = repository.getDigitalObjectCount(datasetName); + Long conflictCount = repository.getConflictCount(datasetName); return conflictCount / (double) totalCount; } @Override - public void delete(CharacterisationResult characterisationResult) { + public void delete(CharacterisationResult characterisationResult, String datasetName) { + + } + @Override + public void resolveConflictsNative(String datasetName) { + repository.resolveConflictsSimple(datasetName); + repository.aggregateResults(datasetName); } @Override - public void resolveConflictsNative() { - repository.resolveConflictsSimple(); - repository.aggregateResults(); + public List listDatasets() { + return repository.listDatasets(); } } diff --git a/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java b/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java index 180b8dd..5b5c0fb 100644 --- a/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java +++ b/infra-persistence/src/main/java/rocks/artur/jpa/CharacterisationResultGatewayJpaImpl.java @@ -4,6 +4,7 @@ import jakarta.transaction.Transactional; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.web.servlet.tags.EditorAwareTag; import rocks.artur.domain.*; import rocks.artur.domain.statistics.BinningAlgorithms; import rocks.artur.domain.statistics.PropertiesPerObjectStatistic; @@ -32,14 +33,14 @@ public class CharacterisationResultGatewayJpaImpl implements CharacterisationRes @Override @Transactional - public void addCharacterisationResult(CharacterisationResult characterisationResult) { + public void addCharacterisationResult(CharacterisationResult characterisationResult, String datasetName) { CharacterisationResultJPA toSave = new CharacterisationResultJPA(characterisationResult); LOG.debug("saving " + toSave.toString()); characterisationResultRepository.save(toSave); } @Override - public List getCharacterisationResults(FilterCriteria filter) { + public List getCharacterisationResults(FilterCriteria filter, String datasetName) { List all = characterisationResultRepository.findAll(); List result = all.stream().map(item -> new CharacterisationResult(Property.valueOf(item.getProperty()), item.getValue(), ValueType.valueOf(item.getValueType()), item.getSource(), item.getFilePath())).collect(Collectors.toList()); @@ -47,7 +48,7 @@ public List getCharacterisationResults(FilterCriteria getPropertyDistribution(FilterCriteria filter) { + public List getPropertyDistribution(FilterCriteria filter, String datasetName) { //Specification convert = convert(filter); //final Map result = @@ -64,8 +65,8 @@ public List getPropertyDistribution(FilterCriteria filter) { return collect; } - - public List getPropertyValueDistribution(Property property, FilterCriteria filter) { + @Override + public List getPropertyValueDistribution(Property property, FilterCriteria filter, String datasetName) { switch (property.getValueType()) { case TIMESTAMP: { @@ -120,7 +121,7 @@ public List getPropertyValueDistribution(Property proper } @Override - public List getCharacterisationResultsByFilepath(String filepath) { + public List getCharacterisationResultsByFilepath(String filepath, String datasetName) { List allJPAByFilePath = characterisationResultRepository.findAllByFilePath(filepath); List result = allJPAByFilePath.stream().map(item -> new CharacterisationResult(Property.valueOf(item.getProperty()), item.getValue(), ValueType.valueOf(item.getValueType()), item.getSource(), item.getFilePath())).collect(Collectors.toList()); @@ -128,7 +129,7 @@ public List getCharacterisationResultsByFilepath(String } @Override - public List getCharacterisationResultsByEntry(Entry entry) { + public List getCharacterisationResultsByEntry(Entry entry, String datasetName) { List allJPAByFilePath = characterisationResultRepository.findAllByFilePath(entry.getFilepath()); List result = allJPAByFilePath.stream().filter(item -> item.getProperty().equals(entry.getProperty().toString())).map(item -> new CharacterisationResult(Property.valueOf(item.getProperty()), item.getValue(), ValueType.valueOf(item.getValueType()), item.getSource(), item.getFilePath())).collect(Collectors.toList()); @@ -136,21 +137,21 @@ public List getCharacterisationResultsByEntry(Entry entr } @Override - public List getConflictEntries() { + public List getConflictEntries(String datasetName) { List conflictEntries = characterisationResultViewRepository.getConflictEntries(); List result = conflictEntries.stream().map(item -> new Entry(item[0], item[1])).collect(Collectors.toList()); return result; } @Override - public List getEntries() { + public List getEntries(String datasetName) { List filepathProperty = characterisationResultRepository.getFilepathProperty(); List result = filepathProperty.stream().map(item -> new Entry(item[0].toString(), item[1].toString())).collect(Collectors.toList()); return result; } - - public List getConflictsByFilepath(String filepath) { + @Override + public List getConflictsByFilepath(String filepath, String datasetName) { List allJPAByFilePath = characterisationResultViewRepository.findAllByFilePath(filepath); List result = allJPAByFilePath.stream().filter(item -> item.getValue().equals("CONFLICT")).map(item -> new CharacterisationResult(Property.valueOf(item.getProperty()), item.getValue(), ValueType.valueOf(item.getValueType()), null, item.getFilePath())).collect(Collectors.toList()); @@ -158,7 +159,7 @@ public List getConflictsByFilepath(String filepath) { } @Override - public Map getCollectionStatistics(FilterCriteria filterCriteria) { + public Map getCollectionStatistics(FilterCriteria filterCriteria, String datasetName) { Map result = new HashMap<>(); double[] sizeStatistics = characterisationResultViewRepository.getSizeStatistics(filterCriteria); @@ -175,13 +176,13 @@ public Map getCollectionStatistics(FilterCriteria filterCriteria } @Override - public List getSources() { + public List getSources(String datasetName) { List sources = characterisationResultViewRepository.getSources(); return sources; } @Override - public List getObjects(FilterCriteria filterCriteria) { + public List getObjects(FilterCriteria filterCriteria, String datasetName) { List propertyValueDistribution = characterisationResultViewRepository.getObjects(filterCriteria); @@ -194,7 +195,7 @@ public List getObjects(FilterCriteria filterCriter } @Override - public List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties) { + public List getSamples(FilterCriteria filterCriteria, SamplingAlgorithms algorithm, List properties, String datasetName) { switch (algorithm) { case RANDOM -> { List samples = characterisationResultViewRepository.getRandomSamples(filterCriteria, 10); @@ -210,7 +211,7 @@ public List getSamples(FilterCriteria filterCriteria, SamplingAlgorith } @Override - public void addCharacterisationResults(List characterisationResults) { + public void addCharacterisationResults(List characterisationResults, String datasetName) { List tmp = new ArrayList<>(); characterisationResults.stream().forEach(item -> { if (null == item) { @@ -239,20 +240,25 @@ public void addCharacterisationResults(List characterisa } @Override - public double getConflictRate() { + public double getConflictRate(String datasetName) { Long totalCount = characterisationResultViewRepository.getTotalCount(); Long conflictCount = characterisationResultViewRepository.getConflictCount(); return conflictCount / (double) totalCount; } @Override - public void delete(CharacterisationResult characterisationResult) { + public void delete(CharacterisationResult characterisationResult, String datasetName) { characterisationResultRepository.delete(new CharacterisationResultJPA(characterisationResult)); } @Override - public void resolveConflictsNative() { + public void resolveConflictsNative(String datasetName) { } + @Override + public List listDatasets() { + return List.of(); + } + } diff --git a/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java b/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java index e99a608..28d0cec 100644 --- a/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java +++ b/infra-rest/src/main/java/rocks/artur/endpoints/RestService.java @@ -38,6 +38,7 @@ public class RestService { GetPropertyValueDistribution getPropertyValueDistribution; AnalyzePersistFile analyzePersistFile; GetCollectionStatistics getCollectionStatistics; + GetDatasetInfo getDatasetInfo; ResolveConflicts resolveConflicts; @@ -45,7 +46,7 @@ public RestService(GetProperties getProperties, GetPropertyValueDistribution getPropertyValueDistribution, AnalyzePersistFile analyzePersistFile, GetObjects getObjects, GetCollectionStatistics getCollectionStatistics, - GetSources getSources, GetSamples getSamples, ResolveConflicts resolveConflicts) { + GetSources getSources, GetSamples getSamples, ResolveConflicts resolveConflicts, GetDatasetInfo getDatasetInfo) { this.getProperties = getProperties; this.getObjects = getObjects; this.getPropertyValueDistribution = getPropertyValueDistribution; @@ -54,6 +55,7 @@ public RestService(GetProperties getProperties, this.getSources = getSources; this.getSamples = getSamples; this.resolveConflicts = resolveConflicts; + this.getDatasetInfo = getDatasetInfo; } @RequestMapping(method = RequestMethod.GET, value = "/health") @@ -62,8 +64,9 @@ public String getHealth() { } @RequestMapping(method = RequestMethod.GET, value = "/sources") - public List getSources() { - List sources = getSources.getSources(); + public List getSources( + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) { + List sources = getSources.getSources(datasetName); return sources; } @@ -74,10 +77,11 @@ public String[] getOperators() { } @RequestMapping(method = RequestMethod.GET, value = "/properties") - public List getProperties(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + public List getProperties(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); - List propertyDistribution = getProperties.getProperties(filterCriteria); + List propertyDistribution = getProperties.getProperties(filterCriteria, datasetName); return propertyDistribution; } @@ -85,16 +89,18 @@ public List getProperties(@RequestParam(name = "filter", requ @RequestMapping(method = RequestMethod.POST, value = "/object") @Consumes(MediaType.APPLICATION_JSON) public Iterable getObject( - @RequestParam(name = "filepath", required = true) @Parameter(name = "filepath", description = "Filepath of a digital object", example = "/home/user/file1") String filepath) { - Iterable objects = getObjects.getObject(filepath); + @RequestParam(name = "filepath", required = true) @Parameter(name = "filepath", description = "Filepath of a digital object", example = "/home/user/file1") String filepath, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) { + Iterable objects = getObjects.getObject(filepath, datasetName); return objects; } @RequestMapping(method = RequestMethod.POST, value = "/objectconflicts") @Consumes(MediaType.APPLICATION_JSON) public List getConflictsPerObject( - @RequestParam(name = "filepath", required = true) @Parameter(name = "filepath", description = "Filepath of a digital object", example = "/home/user/file1") String filepath) { - List objects = getObjects.getConflictsFromObject(filepath); + @RequestParam(name = "filepath", required = true) @Parameter(name = "filepath", description = "Filepath of a digital object", example = "/home/user/file1") String filepath, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) { + List objects = getObjects.getConflictsFromObject(filepath, datasetName); List collect = objects.stream().map(item -> item.getProperty()).collect(Collectors.toList()); return collect; } @@ -102,20 +108,22 @@ public List getConflictsPerObject( @RequestMapping(method = RequestMethod.POST, value = "/statistics") @Consumes(MediaType.APPLICATION_JSON) - public Map getCollectionStatistics(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + public Map getCollectionStatistics(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); - Map sizeStatistics = getCollectionStatistics.getStatistics(filterCriteria); + Map sizeStatistics = getCollectionStatistics.getStatistics(filterCriteria, datasetName); return sizeStatistics; } @RequestMapping(method = RequestMethod.POST, value = "/objects") @Consumes(MediaType.APPLICATION_JSON) - public List getObjects(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + public List getObjects(@RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); - List objects = getObjects.getObjects(filterCriteria); + List objects = getObjects.getObjects(filterCriteria, datasetName); return objects; } @@ -124,7 +132,8 @@ public List getObjects(@RequestParam(name = "filte @Consumes(MediaType.APPLICATION_JSON) public List getPropertyValueDistribution( @RequestParam(name = "property", required = true) @Parameter(name = "property", description = "Property of a digital object", example = "FORMAT") Property property, - @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { LOG.debug("filter: " + filter); @@ -132,7 +141,7 @@ public List getPropertyValueDistribution( FilterCriteria filterCriteria = parser.parse(filter); List valueDistributionByProperty = - getPropertyValueDistribution.getPropertyValueDistribution(property, filterCriteria); + getPropertyValueDistribution.getPropertyValueDistribution(property, filterCriteria, datasetName); return valueDistributionByProperty; @@ -144,7 +153,8 @@ public List getPropertyValueDistribution( public Iterable getSamples( @RequestParam(name = "algorithm", required = true) @Parameter(name = "algorithm", description = "Sampling algorithm", example = "RANDOM") SamplingAlgorithms algorithm, @RequestParam(name = "properties", required = false) @Parameter(name = "properties", description = "A list of properties") List properties, - @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); @@ -152,7 +162,7 @@ public Iterable getSamples( getSamples.setAlgorithm(algorithm); getSamples.setProperties(properties); - Iterable objects = getSamples.getObjects(filterCriteria); + Iterable objects = getSamples.getObjects(filterCriteria, datasetName); return objects; } @@ -161,7 +171,8 @@ public Iterable getSamples( public List getSamplingInfo( @RequestParam(name = "algorithm", required = true) @Parameter(name = "algorithm", description = "Sampling algorithm", example = "RANDOM") SamplingAlgorithms algorithm, @RequestParam(name = "properties", required = false) @Parameter(name = "properties", description = "A list of properties") List properties, - @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter) throws ParseException { + @RequestParam(name = "filter", required = false) @Parameter(name = "filter", description = "Filter", example = "FORMAT=\"Portable Document Format\"") String filter, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { CriteriaParser parser = new CriteriaParser(); FilterCriteria filterCriteria = parser.parse(filter); @@ -169,7 +180,7 @@ public List getSamplingInfo( getSamples.setAlgorithm(algorithm); getSamples.setProperties(properties); - List samplingInfo = getSamples.getSamplingInfo(filterCriteria); + List samplingInfo = getSamples.getSamplingInfo(filterCriteria, datasetName); return samplingInfo; } @@ -177,12 +188,13 @@ public List getSamplingInfo( @RequestMapping(method = RequestMethod.POST, value = "/upload", consumes = { "multipart/form-data"}) public Response ProcessFile( - @RequestParam(name = "file", required = true) @Parameter(name = "file", description = "Please select a digital object to upload") MultipartFile file) throws IOException { + @RequestParam(name = "file", required = true) @Parameter(name = "file", description = "Please select a digital object to upload") MultipartFile file, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws IOException { String filename = file.getOriginalFilename(); byte[] bytes = file.getBytes(); LOG.debug(String.format("Processing file { %s }", file.getOriginalFilename())); ByteFile byteFile = new ByteFile(bytes, filename); - Long totalCount = analyzePersistFile.uploadCharacterisationResults(byteFile); + Long totalCount = analyzePersistFile.uploadCharacterisationResults(byteFile, datasetName); Response response = Response.ok(totalCount).build(); @@ -193,7 +205,8 @@ public Response ProcessFile( @RequestMapping(method = RequestMethod.POST, value = "/multipleupload", consumes = { "multipart/form-data"}) - public Response ProcessFiles(@RequestPart(name = "files", required = true) @Parameter(name = "files", description = "A list of digital objects to upload") MultipartFile[] files) throws IOException { + public Response ProcessFiles(@RequestPart(name = "files", required = true) @Parameter(name = "files", description = "A list of digital objects to upload") MultipartFile[] files, + @RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws IOException { Long totalCount = 0L; List byteFiles = new ArrayList<>(); for (MultipartFile file : files) { @@ -201,7 +214,7 @@ public Response ProcessFiles(@RequestPart(name = "files", required = true) @Para ByteFile byteFile = new ByteFile(file.getBytes(), file.getOriginalFilename()); byteFiles.add(byteFile); } - analyzePersistFile.uploadCharacterisationResults(byteFiles); + analyzePersistFile.uploadCharacterisationResults(byteFiles, datasetName); Response response = Response.ok(totalCount).build(); return response; } @@ -209,7 +222,13 @@ public Response ProcessFiles(@RequestPart(name = "files", required = true) @Para @RequestMapping(method = RequestMethod.POST, value = "/resolveconflicts") @Consumes(MediaType.APPLICATION_JSON) - public void resolveConflicts() throws ParseException { - resolveConflicts.run(); + public void resolveConflicts(@RequestParam(name = "datasetName", required = true, defaultValue = "default") @Parameter(name = "datasetName", description = "dataset name", example = "default") String datasetName) throws ParseException { + resolveConflicts.run(datasetName); + } + + @RequestMapping(method = RequestMethod.GET, value = "/datasets") + @Consumes(MediaType.APPLICATION_JSON) + public List listDatasets() { + return getDatasetInfo.listDatasets(); } } diff --git a/main/src/main/java/rocks/artur/WebConfig.java b/main/src/main/java/rocks/artur/WebConfig.java new file mode 100644 index 0000000..afb3fc7 --- /dev/null +++ b/main/src/main/java/rocks/artur/WebConfig.java @@ -0,0 +1,17 @@ +package rocks.artur; + +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.CorsRegistry; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +//@Configuration +public class WebConfig implements WebMvcConfigurer { + + @Override + public void addCorsMappings(CorsRegistry registry) { + registry.addMapping("/**") + .allowedOrigins("*") + .allowedMethods("GET", "POST", "PUT", "DELETE") + .allowedHeaders("*"); + } +} \ No newline at end of file diff --git a/main/src/test/java/rocks/artur/CRHResolveConflictsImplTest.java b/main/src/test/java/rocks/artur/CRHResolveConflictsImplTest.java index 7b86e91..320ce82 100644 --- a/main/src/test/java/rocks/artur/CRHResolveConflictsImplTest.java +++ b/main/src/test/java/rocks/artur/CRHResolveConflictsImplTest.java @@ -28,15 +28,15 @@ class CRHResolveConflictsImplTest { void getAllTest() { Iterable characterisationResults = - characterisationResultGatewaySqlImpl.getCharacterisationResults(null); - double conflictRateBefore = characterisationResultGatewaySqlImpl.getConflictRate(); + characterisationResultGatewaySqlImpl.getCharacterisationResults(null, ""); + double conflictRateBefore = characterisationResultGatewaySqlImpl.getConflictRate(""); List list = new ArrayList<>(); characterisationResults.forEach(list::add); //Assert.assertEquals(10, list.size()); - resolveConflicts.run(); + //resolveConflicts.run(); - double conflictRateAfter = characterisationResultGatewaySqlImpl.getConflictRate(); + double conflictRateAfter = characterisationResultGatewaySqlImpl.getConflictRate(""); System.out.println(String.format("Conflict rate: before - %4.3f, after - %4.3f", conflictRateBefore, conflictRateAfter)); } diff --git a/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java b/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java index fbba02f..2f5cfa2 100644 --- a/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java +++ b/main/src/test/java/rocks/artur/CharacterisationResultGatewayImplTest.java @@ -35,7 +35,7 @@ class CharacterisationResultGatewayImplTest { void getAllTest() { Iterable characterisationResults = - characterisationResultGatewaySqlImpl.getCharacterisationResults(null); + characterisationResultGatewaySqlImpl.getCharacterisationResults(null, ""); List list = new ArrayList<>(); characterisationResults.forEach(list::add); @@ -45,7 +45,7 @@ void getAllTest() { @Test void getPropertyDistributionTest() { - List propertyDistribution = characterisationResultGatewaySqlImpl.getPropertyDistribution(null); + List propertyDistribution = characterisationResultGatewaySqlImpl.getPropertyDistribution(null, ""); Assert.assertEquals(4, propertyDistribution.size()); } @@ -54,7 +54,7 @@ void getPropertyValueDistributionWithFilterTest() throws ParseException { String typeFilter = "FORMAT=\"Portable Document Format\""; CriteriaParser parser = new CriteriaParser(); FilterCriteria parse = parser.parse(typeFilter); - List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.FORMAT, parse); + List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.FORMAT, parse, ""); System.out.println(propertyValueDistribution); Assert.assertEquals(1, propertyValueDistribution.size()); } @@ -62,9 +62,9 @@ void getPropertyValueDistributionWithFilterTest() throws ParseException { @Test void getPropertyValueDistributionWithoutFilterTest() { - List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.FORMAT, null); + List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.FORMAT, null, ""); System.out.println(propertyValueDistribution); - List characterisationResults = characterisationResultGatewaySqlImpl.getCharacterisationResults(null); + List characterisationResults = characterisationResultGatewaySqlImpl.getCharacterisationResults(null, ""); System.out.println(characterisationResults); Assert.assertEquals(3, propertyValueDistribution.size()); } @@ -73,14 +73,14 @@ void getPropertyValueDistributionWithoutFilterTest() { @Test void getPropertyValueFloatDistributionWithoutFilterTest() { - List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.SIZE, null); + List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.SIZE, null, ""); Assert.assertEquals(2, propertyValueDistribution.size()); } @Test void getPropertyValueDistributionWithoutFilterCONFLICTTest() { - List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.MIMETYPE, null); + List propertyValueDistribution = characterisationResultGatewaySqlImpl.getPropertyValueDistribution(Property.MIMETYPE, null, ""); Assert.assertEquals(2, propertyValueDistribution.size()); boolean conflict = propertyValueDistribution.stream().anyMatch(propertyValueStatistic -> propertyValueStatistic.getValue().equals("CONFLICT")); Assert.assertFalse(conflict); @@ -89,7 +89,7 @@ void getPropertyValueDistributionWithoutFilterCONFLICTTest() { @Test void getCharacterisationResultsByFilepathTest() { Iterable propertyValueStatistics = - characterisationResultGatewaySqlImpl.getCharacterisationResultsByFilepath("/home/artur"); + characterisationResultGatewaySqlImpl.getCharacterisationResultsByFilepath("/home/artur", ""); List list = new ArrayList<>(); propertyValueStatistics.forEach(list::add); @@ -102,7 +102,7 @@ void getCollectionStatisticsWithoutFilterTest() throws ParseException { String typeFilter = "FORMAT=\"Portable Document Format\""; CriteriaParser parser = new CriteriaParser(); FilterCriteria parse = parser.parse(typeFilter); - Map sizeStatistics = characterisationResultGatewaySqlImpl.getCollectionStatistics(null); + Map sizeStatistics = characterisationResultGatewaySqlImpl.getCollectionStatistics(null, ""); Assert.assertEquals(10047.0, sizeStatistics.get("totalSize"), 0.1); System.out.println(sizeStatistics); } @@ -112,7 +112,7 @@ void getCollectionStatisticsWithFilterTest() throws ParseException { String typeFilter = "FORMAT=\"Portable Document Format\""; CriteriaParser parser = new CriteriaParser(); FilterCriteria parse = parser.parse(typeFilter); - Map sizeStatistics = characterisationResultGatewaySqlImpl.getCollectionStatistics(parse); + Map sizeStatistics = characterisationResultGatewaySqlImpl.getCollectionStatistics(parse, ""); Assert.assertEquals(4.0, sizeStatistics.get("totalSize"), 0.1); System.out.println(sizeStatistics); } @@ -122,7 +122,7 @@ void getCollectionStatisticsWithFilterTest() throws ParseException { void getRandomSamplesTest() { List properties = new ArrayList<>(); properties.add(Property.FORMAT); - List samples = characterisationResultGatewaySqlImpl.getSamples(null, SamplingAlgorithms.RANDOM, properties); + List samples = characterisationResultGatewaySqlImpl.getSamples(null, SamplingAlgorithms.RANDOM, properties, ""); Assert.assertEquals(5, samples.size()); } @@ -130,33 +130,33 @@ void getRandomSamplesTest() { void getSFDSamplesTest() { List properties = new ArrayList<>(); properties.add(Property.FORMAT); - List samples = characterisationResultGatewaySqlImpl.getSamples(null, SamplingAlgorithms.SELECTIVE_FEATURE_DISTRIBUTION, properties); + List samples = characterisationResultGatewaySqlImpl.getSamples(null, SamplingAlgorithms.SELECTIVE_FEATURE_DISTRIBUTION, properties, ""); Assert.assertEquals(3, samples.size()); } @Test void getConflictRateTest() { - double conflictRate = characterisationResultGatewaySqlImpl.getConflictRate(); + double conflictRate = characterisationResultGatewaySqlImpl.getConflictRate(""); Assert.assertEquals(0.4,conflictRate, 0.01); } @Test void getConflictsByFilepathTest() { - List filepathProperty = characterisationResultGatewaySqlImpl.getConflictEntries(); + List filepathProperty = characterisationResultGatewaySqlImpl.getConflictEntries(""); Assert.assertEquals(2,filepathProperty.size()); } @Test void getCharacterisationResultsByFilepathPropertyTest() { - List filepathProperty = characterisationResultGatewaySqlImpl.getEntries(); + List filepathProperty = characterisationResultGatewaySqlImpl.getEntries(""); List results = new ArrayList<>(); for (Entry strings : filepathProperty) { - List characterisationResultsByFilepathProperty = characterisationResultGatewaySqlImpl.getCharacterisationResultsByEntry(strings); + List characterisationResultsByFilepathProperty = characterisationResultGatewaySqlImpl.getCharacterisationResultsByEntry(strings, ""); results.addAll(characterisationResultsByFilepathProperty); } diff --git a/main/src/test/java/rocks/artur/RestServiceTest.java b/main/src/test/java/rocks/artur/RestServiceTest.java index b853247..f7f574a 100644 --- a/main/src/test/java/rocks/artur/RestServiceTest.java +++ b/main/src/test/java/rocks/artur/RestServiceTest.java @@ -102,6 +102,7 @@ void emptyTest() { @Test void getCollectionStatisticsTest() { String str = given().port(port) + .param("datasetName", "default") .when().post("/statistics") .then() .statusCode(200).extract().asString(); @@ -112,6 +113,7 @@ void getCollectionStatisticsTest() { @Test void getPropertiesTest() { String str = given().port(port) + .param("datasetName", "default") .when().get("/properties") .then() .statusCode(200).extract().asString(); @@ -122,6 +124,7 @@ void getPropertiesTest() { @Test void getSourcesTest() { String str = given().port(port) + .param("datasetName", "default") .when().get("/sources") .then() .statusCode(200).extract().asString(); @@ -139,6 +142,7 @@ void getOperatorsTest() { @Test void getObjectTest() { String str = given().port(port).param("filepath","/home/artur/file1") + .param("datasetName", "default") .when().post("/object") .then() .statusCode(200).extract().asString(); @@ -148,6 +152,7 @@ void getObjectTest() { @Test void getObjectsTest() { String str = given().port(port).param("filter", " format='docx' OR format='pdf'") + .param("datasetName", "default") .when().post("/objects") .then() .statusCode(200).extract().asString(); @@ -157,6 +162,7 @@ void getObjectsTest() { @Test void getObjectConflictsTest() { String str = given().port(port).param("filepath","/home/artur/file1") + .param("datasetName", "default") .when().post("/objectconflicts") .then() .statusCode(200).extract().asString(); @@ -169,6 +175,7 @@ void getPropertyDistributionWithFilterTest() { String str = given().port(port) .param("filter", "FORMAT=\"Portable Document Format\"") .param("property", "FORMAT") + .param("datasetName", "default") .when().post("/propertyvalues") .then() .statusCode(200).extract().asString(); @@ -179,6 +186,7 @@ void getPropertyDistributionWithFilterTest() { void getPropertyDistributionWithoutFilterTest() { String str = given().port(port) .param("property", "FORMAT") + .param("datasetName", "default") .when().post("/propertyvalues") .then() .statusCode(200).extract().asString(); @@ -211,7 +219,7 @@ void uploadFileTest() { //Then, I call my /upload endpoint, where a FITS XML is generated and the char results uploaded into DB - given().port(port).multiPart("file",file) + given().port(port).param("datasetName", "default").multiPart("file",file) .when().post("/upload") .then() .statusCode(200).extract().asString(); @@ -223,6 +231,7 @@ void uploadFileTest() { given().port(port) .param("filepath", "/usr/local/tomcat/webapps/fits/upload/1582118786085/README.md") + .param("datasetName", "default") .when().post("/object") .then() .statusCode(200).extract().asString(); @@ -234,6 +243,7 @@ void uploadFileTest() { @Test void resolveConflictsTest() { String str = given().port(port) + .param("datasetName", "default") .when().post("/resolveconflicts") .then() .statusCode(200).extract().asString(); diff --git a/web/frontend/src/AppConfig.jsx b/web/frontend/src/AppConfig.jsx index d59ecef..bc2613d 100644 --- a/web/frontend/src/AppConfig.jsx +++ b/web/frontend/src/AppConfig.jsx @@ -20,6 +20,8 @@ const AppConfig = () => { [] ); + + var myHeaders = new Headers(); myHeaders.append("Content-Type", "application/json"); @@ -29,16 +31,30 @@ const AppConfig = () => { redirect: "follow", }; + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); + const fetchGlobalProperties = async () => { - await fetch(BACKEND_URL + "/statistics?", requestOptions); - const response = await fetch(BACKEND_URL + "/properties"); + await fetch(BACKEND_URL + "/statistics?" + + new URLSearchParams({ + datasetName: dataset, + }), requestOptions); + const response = await fetch(BACKEND_URL + "/properties?" + + new URLSearchParams({ + datasetName: "default", + })); let data = await response.json(); let properties = data.map((prop) => prop.property); setGlobalProperties(properties); }; const fetchGlobalStatistics = async () => { - const response = await fetch(BACKEND_URL + "/statistics?", requestOptions); + const response = await fetch(BACKEND_URL + "/statistics?" + + new URLSearchParams({ + datasetName: dataset, + }), requestOptions); let data = await response.json(); setGlobalStatistics(data); }; @@ -51,7 +67,9 @@ const AppConfig = () => { console.log(error); setErrorMessage("REST API is not accessible!"); } - }; + } + + const fetchInitialData = async () => { await fetchHealth(); diff --git a/web/frontend/src/components/Upload.jsx b/web/frontend/src/components/Upload.jsx index fbe600e..5429826 100644 --- a/web/frontend/src/components/Upload.jsx +++ b/web/frontend/src/components/Upload.jsx @@ -7,12 +7,16 @@ import "@uppy/dashboard/dist/style.css"; import XHRUpload from "@uppy/xhr-upload"; import { BACKEND_URL } from "../AppConfig"; -const Upload = () => { +const Upload = ({dataset}) => { const theme = useTheme(); const colors = tokens(theme.palette.mode); - const uppy = new Uppy().use(XHRUpload, { + const uppy = new Uppy({ + debug: true, + meta: { datasetName: dataset }, + }) .use(XHRUpload, { endpoint: BACKEND_URL + "/upload", + allowedMetaFields: ['datasetName'] }); return ( { const [filter, setFilter] = useSessionStorage("filterString", ""); - + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); const [data, setData] = useState([]); // GET with fetch API useEffect(() => { @@ -34,6 +37,7 @@ const PropertyValueDistribution = (payload) => { new URLSearchParams({ property: payload["property"], filter: filter, + datasetName: dataset }), requestOptions ); @@ -60,7 +64,7 @@ const PropertyValueDistribution = (payload) => { } }; fetchPost(); - }, [filter]); + }, [filter, dataset]); let filterClick = (property, event) => { if (event.indexValue == ".etc") { diff --git a/web/frontend/src/scenes/dashboard/index.jsx b/web/frontend/src/scenes/dashboard/index.jsx index db97df2..d106200 100644 --- a/web/frontend/src/scenes/dashboard/index.jsx +++ b/web/frontend/src/scenes/dashboard/index.jsx @@ -18,7 +18,10 @@ const Dashboard = () => { const [filter, setFilter] = useSessionStorage("filterString", ""); - + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); const [globalStatistics, setGlobalStatistics] = useSessionStorage( "globalStatistics", [ @@ -45,7 +48,10 @@ const Dashboard = () => { ); const fetchGlobalProperties = async () => { - const response = await fetch(BACKEND_URL + "/properties"); + const response = await fetch(BACKEND_URL + "/properties?" + + new URLSearchParams({ + datasetName: "default", + })); let data = await response.json(); let properties = data.map((prop) => prop.property); setGlobalProperties(properties); @@ -66,6 +72,7 @@ const Dashboard = () => { "/statistics?" + new URLSearchParams({ filter: filter, + datasetName: dataset }), requestOptions ); @@ -81,7 +88,7 @@ const Dashboard = () => { useEffect(() => { console.log("loading the dashboard"); fetchData(); - }, [filter]); + }, [filter, dataset]); const handleClick = () => { console.log("Conflict resolution started"); diff --git a/web/frontend/src/scenes/global/Topbar.jsx b/web/frontend/src/scenes/global/Topbar.jsx index f6718e2..a43f8fb 100644 --- a/web/frontend/src/scenes/global/Topbar.jsx +++ b/web/frontend/src/scenes/global/Topbar.jsx @@ -1,17 +1,81 @@ -import { Box, IconButton, useTheme } from "@mui/material"; -import { useContext } from "react"; - -import { ColorModeContext, tokens } from "../../theme"; +import {Box, useTheme} from "@mui/material"; +import React, {useContext, useEffect} from "react"; +import InputLabel from '@mui/material/InputLabel'; +import MenuItem from '@mui/material/MenuItem'; +import FormControl from '@mui/material/FormControl'; +import Select from '@mui/material/Select'; +import {ColorModeContext, tokens} from "../../theme"; import Filter from "../../components/Filter"; +import {BACKEND_URL} from "../../AppConfig"; +import {useSessionStorage} from "@uidotdev/usehooks"; + const Topbar = () => { const theme = useTheme(); const colors = tokens(theme.palette.mode); - const colorMode = useContext(ColorModeContext); - return ( - - - + const [datasets, setDatasets] = useSessionStorage( + "datasets", + [] + ); + + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); + + var myHeaders = new Headers(); + myHeaders.append("Content-Type", "application/json"); + var requestGETOptions = { + method: "GET", + headers: myHeaders, + redirect: "follow", + }; + + const fetchDatasets = async () => { + const response = await fetch(BACKEND_URL + "/datasets", requestGETOptions); + let data = await response.json(); + setDatasets(data); + }; + + + + const colorMode = useContext(ColorModeContext); + + const fetchData = async () => { + await fetchDatasets(); + }; + + useEffect(() => { + fetchDatasets(); + }, []); + + + + const handleChange = (event) => { + setDataset(event.target.value); + }; + + const handleClick = (event) => { + fetchDatasets(); + }; + + + return ( + + + + + + + Dataset + + + + ); }; diff --git a/web/frontend/src/scenes/objectDetails/index.jsx b/web/frontend/src/scenes/objectDetails/index.jsx index a2e4159..2b820cf 100644 --- a/web/frontend/src/scenes/objectDetails/index.jsx +++ b/web/frontend/src/scenes/objectDetails/index.jsx @@ -25,6 +25,12 @@ const ObjectDetails = () => { "selectedObject", "" ); + + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); + useEffect(() => { console.log("loading the object details list"); var myHeaders = new Headers(); @@ -48,6 +54,7 @@ const ObjectDetails = () => { "/object?" + new URLSearchParams({ filepath: selectedObject, + datasetName: dataset }), requestOptions ); @@ -58,6 +65,7 @@ const ObjectDetails = () => { "/objectconflicts?" + new URLSearchParams({ filepath: selectedObject, + datasetName: dataset }), requestOptions ); diff --git a/web/frontend/src/scenes/objects/index.jsx b/web/frontend/src/scenes/objects/index.jsx index 5847c21..779c98c 100644 --- a/web/frontend/src/scenes/objects/index.jsx +++ b/web/frontend/src/scenes/objects/index.jsx @@ -19,6 +19,10 @@ const Objects = () => { "selectedObject", "" ); + const [dataset, setDataset] = useSessionStorage( + "dataset", + "" + ); const [filter, setFilter] = useSessionStorage("filterString", ""); const navigate = useNavigate(); useEffect(() => { @@ -44,6 +48,7 @@ const Objects = () => { "/objects?" + new URLSearchParams({ filter: filter, + datasetName: dataset }), requestOptions ); diff --git a/web/frontend/src/scenes/uploadForm/index.jsx b/web/frontend/src/scenes/uploadForm/index.jsx index a6390f7..de13a50 100644 --- a/web/frontend/src/scenes/uploadForm/index.jsx +++ b/web/frontend/src/scenes/uploadForm/index.jsx @@ -1,16 +1,36 @@ -import { Box } from "@mui/material"; +import {Box, Button} from "@mui/material"; import Header from "../../components/Header"; import Upload from "../../components/Upload"; +import TextField from "@mui/material/TextField"; +import React from "react"; +import {useContext, useState} from "react"; + const UploadForm = () => { + + const [newDataset, setNewDataset] = useState(''); + + const handleTextInputChange = event => { + setNewDataset(event.target.value); + }; + + + + return (
+ + + + + - +
);