Skip to content

Commit

Permalink
Feature/db boost (#10)
Browse files Browse the repository at this point in the history
* add docker-compose with mysql cluster

* add docker-compose with mysql cluster

* add docker-compose with mysql cluster

* add docker-compose with mysql cluster

* add property to index

* fix typo

* wip

* wip

* wip

* improve date parsing

* wip

* wip

* wip

* added xml parsing using stax

* minor fix

* Removed property MESSAGE

* Clean up

* Clean up

* Works

* Extend index

* Add loadbalancing

* Add fileupload.py

* Improve db IO

* minor

* Fix NPE

* renamed columns

* works

* improving mysql cluster

* Add auto.sh

* redoing the queries

* redoing the queries. add caching

* redoing the queries. add caching

* indices look good

* indices look good

---------

Co-authored-by: artur <[email protected]>
  • Loading branch information
artourkin and artur authored Mar 13, 2024
1 parent 420ed35 commit 4daf252
Show file tree
Hide file tree
Showing 53 changed files with 1,213 additions and 485 deletions.
Binary file added .mvn/wrapper/maven-wrapper.jar
Binary file not shown.
18 changes: 18 additions & 0 deletions .mvn/wrapper/maven-wrapper.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
distributionUrl=https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.8.7/apache-maven-3.8.7-bin.zip
wrapperUrl=https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM maven:3.9.0 as builder
COPY . /app
WORKDIR /app
RUN --mount=type=cache,target=/root/.m2 mvn -pl !web clean install -Pdocker
RUN --mount=type=cache,target=/root/.m2 mvn -pl -web clean install -Pdocker

FROM openjdk:21-jdk-slim
WORKDIR /app
Expand Down
16 changes: 16 additions & 0 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
FROM maven:3.9.0 as builder
COPY . /app
WORKDIR /app
RUN --mount=type=cache,target=/root/.m2 mvn -pl -web clean install -DskipTests

FROM openjdk:21-jdk-slim
WORKDIR /app
RUN printenv
COPY --from=builder /app/main/target/fitsinn-main-*.jar ./app.jar

RUN chown 1001 ./app.jar \
&& chmod "g+rwX" ./app.jar

USER 1001
EXPOSE 8080
ENTRYPOINT ["java", "-jar", "app.jar"]
11 changes: 8 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,20 @@ Installation of FITSInn to Docker Swarm or K8S is possible, but is not currently

### Local build

Building the Docker images from scratch and starting FITSInn is executed via:

Building the Docker images from scratch and starting FITSInn is executed via:
```
docker-compose -f docker-compose.dev.yaml up --build
```

File uploading using bash:

```
bash fileupload.sh http://localhost:8082 ~/rnd/data/govdocs_fits/govdocs1/000/
```

File uploading using python (pip package requests in necessary):
```
python fileupload.py http://localhost:8082/multipleupload ~/rnd/data/govdocs_fits/govdocs1/000/ 100 3
```

## Issues

Expand Down
Empty file added auto.post
Empty file.
19 changes: 19 additions & 0 deletions auto.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash
source /home/artur/rnd/git/fitsinn/.venv/bin/activate
./mvnw -pl -web -DskipTests clean install

./mvnw spring-boot:run -f main/pom.xml &

proc_id=$!
echo proc_id is $proc_id

sleep 10

python fileupload.py http://localhost:8080/multipleupload ~/rnd/data/subset_govdocs/ 100 2


#time curl -X 'POST' 'http://localhost:8080/propertyvalues?property=FORMAT' -H 'accept: */*' -d ''
ab -n 50 -c 1 -p auto.post -T 'text/plain' 'http://localhost:8080/propertyvalues?property=FORMAT'

echo killing proc_id
kill $proc_id
7 changes: 7 additions & 0 deletions cassandra/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: '3.9'

services:
cassandra:
image: cassandra
ports:
- 9042:9042
11 changes: 8 additions & 3 deletions core/src/main/java/rocks/artur/api/AnalyzePersistFile.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
package rocks.artur.api;

import java.io.File;
import rocks.artur.api_impl.utils.ByteFile;

import java.util.List;

/**
* This interface enables the following actions:
* - to analyze a digital object using a characterisation tool,
* - to persist a characterisation result in a db.
*/
public interface AnalyzePersistFile {
Long uploadCharacterisationResults(File file);
Long uploadCharacterisationResults(byte[] file, String filename);

Long uploadCharacterisationResults(ByteFile file);

Long uploadCharacterisationResults(List<ByteFile> files);

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package rocks.artur.api;

import rocks.artur.api_impl.utils.ByteFile;
import rocks.artur.domain.CharacterisationResult;

import java.io.File;
Expand All @@ -15,7 +16,7 @@ public interface CharacterisationResultProducer {
* @return A version of the tool
* @throws IOException
*/
String getVersion() throws IOException;
String getVersion();

/***
*
Expand All @@ -25,17 +26,16 @@ public interface CharacterisationResultProducer {
* @return A list of @CharacterisationResult
* @throws IOException
*/
List<CharacterisationResult> processFile(File file) throws IOException;
List<CharacterisationResult> processFile(File file);


/***
*
* This method extracts metadata properties from a given digital object passed as a byte array.
*
* @param file Input File
* @param filename
* @return A list of @CharacterisationResult
* @throws IOException
*/
List<CharacterisationResult> processFile(byte[] file, String filename) throws IOException;
List<CharacterisationResult> processFile(ByteFile file);
}
33 changes: 15 additions & 18 deletions core/src/main/java/rocks/artur/api_impl/AnalyzePersistFileImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

import rocks.artur.api.AnalyzePersistFile;
import rocks.artur.api.CharacterisationResultProducer;
import rocks.artur.api_impl.utils.ByteFile;
import rocks.artur.domain.CharacterisationResult;
import rocks.artur.domain.CharacterisationResultGateway;

import java.io.File;
import java.io.IOException;

import java.util.ArrayList;
import java.util.List;

public class AnalyzePersistFileImpl implements AnalyzePersistFile {
Expand All @@ -20,24 +21,20 @@ public AnalyzePersistFileImpl(CharacterisationResultProducer characterisationRes
}

@Override
public Long uploadCharacterisationResults(File file) {
try {
List<CharacterisationResult> characterisationResults = characterisationResultProducer.processFile(file);
characterisationResults.forEach(item -> characterisationResultGateway.addCharacterisationResult(item));
return Long.valueOf(characterisationResults.size());
} catch (IOException e) {
throw new RuntimeException(e);
}
public Long uploadCharacterisationResults(ByteFile file) {
List<CharacterisationResult> characterisationResults = characterisationResultProducer.processFile(file);
characterisationResultGateway.addCharacterisationResults(characterisationResults);
return Long.valueOf(characterisationResults.size());
}

@Override
public Long uploadCharacterisationResults(byte[] file, String filename) {
try {
List<CharacterisationResult> characterisationResults = characterisationResultProducer.processFile(file, filename);
characterisationResultGateway.addCharacterisationResults(characterisationResults);
return Long.valueOf(characterisationResults.size());
} catch (IOException e) {
throw new RuntimeException(e);
}
public Long uploadCharacterisationResults(List<ByteFile> files) {
List<CharacterisationResult> characterisationResults = new ArrayList<>();
files.stream().forEach(file -> {
List<CharacterisationResult> tmp = characterisationResultProducer.processFile(file);
characterisationResults.addAll(tmp);
});
characterisationResultGateway.addCharacterisationResults(characterisationResults);
return Long.valueOf(characterisationResults.size());
}
}
27 changes: 27 additions & 0 deletions core/src/main/java/rocks/artur/api_impl/utils/ByteFile.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package rocks.artur.api_impl.utils;

public class ByteFile {
byte[] file;
String filename;

public ByteFile(byte[] file, String filename) {
this.file = file;
this.filename = filename;
}

public byte[] getFile() {
return file;
}

public String getFilename() {
return filename;
}

public void setFile(byte[] file) {
this.file = file;
}

public void setFilename(String filename) {
this.filename = filename;
}
}
7 changes: 0 additions & 7 deletions core/src/main/java/rocks/artur/domain/Property.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,9 @@ public enum Property {
FORMAT(ValueType.STRING),
FORMAT_VERSION(ValueType.STRING),
MIMETYPE(ValueType.STRING),
FILENAME(ValueType.STRING),
AUTHOR(ValueType.STRING),
EXTERNALIDENTIFIER(ValueType.STRING),
SIZE(ValueType.INTEGER),
MD5CHECKSUM(ValueType.STRING),
FSLASTMODIFIED(ValueType.TIMESTAMP),
FILEPATH(ValueType.STRING),
CREATED(ValueType.TIMESTAMP),
LASTMODIFIED(ValueType.TIMESTAMP),
CREATINGAPPLICATIONVERSION(ValueType.STRING),
Expand All @@ -26,15 +22,12 @@ public enum Property {

WELLFORMED(ValueType.STRING),

MESSAGE(ValueType.STRING),

LINEBREAK(ValueType.STRING),
CHARSET(ValueType.STRING),
PAGECOUNT(ValueType.INTEGER),
WORDCOUNT(ValueType.INTEGER),
CHARACTERCOUNT(ValueType.INTEGER),
HASANNOTATIONS(ValueType.STRING),
TITLE(ValueType.STRING),
ISTAGGED(ValueType.STRING),
HASFORMS(ValueType.STRING),
HASOUTLINE(ValueType.STRING),
Expand Down
Loading

0 comments on commit 4daf252

Please sign in to comment.