Skip to content

Commit

Permalink
Bug fix/fix tutorials (#24)
Browse files Browse the repository at this point in the history
* fix: Fix and refactor tutorials

* chore: Add icons

* fix: Fix split tables replacing recursivity by a queue

---------

Co-authored-by: Romuald Rousseau <[email protected]>
  • Loading branch information
RomualdRousseau and Romuald Rousseau authored Oct 17, 2024
1 parent 6eb68a6 commit 069b877
Show file tree
Hide file tree
Showing 18 changed files with 270 additions and 149 deletions.
Binary file added archery-documents/icons/archery-open-graph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 6 additions & 0 deletions archery-examples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@
<artifactId>archery-pdf</artifactId>
<version>${project.version}</version>
</dependency>
<!-- Log4j Framework -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j2-impl</artifactId>
<version>${log4j.version}</version>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

public class Common {
private static final Logger LOGGER = LoggerFactory.getLogger(Common.class);
private static final String REPO_BASE_URL = "https://raw.githubusercontent.com/RomualdRousseau/Archery/Archery-Models/main";
private static final String REPO_BASE_URL = "https://raw.githubusercontent.com/romualdrousseau/archery/main/archery-models";

public static <T> JsonModelBuilder loadModelBuilder(final String modelName, final Class<T> clazz) {
return new JsonModelBuilder().fromPath(Common.getResourcePath(String.format("/models/%s.json", modelName), clazz));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
package com.github.romualdrousseau.archery.examples;

import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.github.romualdrousseau.archery.Document;
import com.github.romualdrousseau.archery.DocumentFactory;
import com.github.romualdrousseau.archery.parser.LayexTableParser;

public class Tutorial2 implements Runnable {

public Tutorial2() {
public static void main(final String[] args) {
new Tutorial2().run();
}

@Override
public void run() {
final var tableParser = new LayexTableParser(
List.of("(v.$)+"),
List.of("(()(S+$))(()([/^TOTAL/|v].+$)())+(/TOTAL/.+$)"));

final var builder = Common.loadModelBuilderFromGitHub("sales-english");
builder.setTableParser(tableParser);
builder.getEntityList().add("PRODUCTNAME");
builder.getPatternMap().put("\\D+\\dml", "PRODUCTNAME");
builder.getPatternMap().values().remove("PACKAGE");
final var model = builder.build();

final var model = builder
.setTableParser(this.customTableParser())
.setEntityList(this.customEntities(builder.getEntityList()))
.setPatternMap(this.customPatternMap(builder.getPatternMap()))
.build();

final var file = Common.loadData("document with multiple tables.xlsx", this.getClass());
try (final var doc = DocumentFactory.createInstance(file, "UTF-8")
Expand All @@ -37,7 +38,22 @@ public void run() {
}
}

public static void main(final String[] args) {
new Tutorial2().run();
private LayexTableParser customTableParser() {
return new LayexTableParser(
List.of("(v.$)+"),
List.of("(()(S+$))(()([/^TOTAL/|v].+$)())+(/TOTAL/.+$)"));
}

private List<String> customEntities(final List<String> entities) {
final var result = new ArrayList<String>(entities);
result.add("PRODUCTNAME");
result.remove("PACKAGE");
return result;
}

private Map<String, String> customPatternMap(final Map<String, String> patterns) {
final var result = new HashMap<String, String>(patterns);
result.put("\\D+\\dml", "PRODUCTNAME");
return result;
}
}
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
package com.github.romualdrousseau.archery.examples;

import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.github.romualdrousseau.archery.Document;
import com.github.romualdrousseau.archery.DocumentFactory;
import com.github.romualdrousseau.archery.parser.LayexTableParser;

public class Tutorial3 implements Runnable {

public Tutorial3() {
public static void main(final String[] args) {
new Tutorial3().run();
}

@Override
public void run() {
final var tableParser = new LayexTableParser(
List.of("(v.$)+"),
List.of("(()(S+$))(()([/^TOTAL/|v].+$)())+(/TOTAL/.+$)"));

final var builder = Common.loadModelBuilderFromGitHub("sales-english");
builder.setTableParser(tableParser);
builder.getEntityList().add("PRODUCTNAME");
builder.getPatternMap().put("\\D+\\dml", "PRODUCTNAME");
final var model = builder.build();

final var model = builder
.setTableParser(this.customTableParser())
.setEntityList(this.customEntities(builder.getEntityList()))
.setPatternMap(this.customPatternMap(builder.getPatternMap()))
.build();

final var file = Common.loadData("document with defect.xlsx", this.getClass());
try (final var doc = DocumentFactory.createInstance(file, "UTF-8")
Expand All @@ -37,7 +39,22 @@ public void run() {
}
}

public static void main(final String[] args) {
new Tutorial3().run();
private LayexTableParser customTableParser() {
return new LayexTableParser(
List.of("(v.$)+"),
List.of("(()(S+$))(()([/^TOTAL/|v].+$)())+(/TOTAL/.+$)"));
}

private List<String> customEntities(final List<String> entities) {
final var result = new ArrayList<String>(entities);
result.add("PRODUCTNAME");
result.remove("PACKAGE");
return result;
}

private Map<String, String> customPatternMap(final Map<String, String> patterns) {
final var result = new HashMap<String, String>(patterns);
result.put("\\D+\\dml", "PRODUCTNAME");
return result;
}
}
Original file line number Diff line number Diff line change
@@ -1,43 +1,59 @@
package com.github.romualdrousseau.archery.examples;

import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.github.romualdrousseau.archery.Document;
import com.github.romualdrousseau.archery.DocumentFactory;
import com.github.romualdrousseau.archery.parser.LayexTableParser;

public class Tutorial4 implements Runnable {

public Tutorial4() {
public static void main(final String[] args) {
new Tutorial4().run();
}

@Override
public void run() {
final var tableParser = new LayexTableParser(
List.of("(v.$)+"),
List.of("(()(S+$))(()([/^TOTAL/|v].+$)())+(/TOTAL/.+$)"));

final var builder = Common.loadModelBuilderFromGitHub("sales-english");
builder.setTableParser(tableParser);
builder.getEntityList().add("PRODUCTNAME");
builder.getPatternMap().put("\\D+\\dml", "PRODUCTNAME");
final var model = builder.build();

final var model = builder
.setTableParser(this.customTableParser())
.setEntityList(this.customEntities(builder.getEntityList()))
.setPatternMap(this.customPatternMap(builder.getPatternMap()))
.build();

final var file = Common.loadData("document with multiple tables.xlsx", this.getClass());
try (final var doc = DocumentFactory.createInstance(file, "UTF-8")
.setModel(model)
.setHints(EnumSet.of(Document.Hint.INTELLI_LAYOUT, Document.Hint.INTELLI_TAG))
.setRecipe("sheet.setCapillarityThreshold(0)")) {

doc.sheets().forEach(s -> Common.addSheetDebugger(s).getTable().ifPresent(t -> {
Common.printTags(t.headers());
Common.printRows(t.rows());
}));
}
}

public static void main(final String[] args) {
new Tutorial4().run();
private LayexTableParser customTableParser() {
return new LayexTableParser(
List.of("(v.$)+"),
List.of("(()(S+$))(()([/^TOTAL/|v].+$)())+(/TOTAL/.+$)"));
}

private List<String> customEntities(final List<String> entities) {
final var result = new ArrayList<String>(entities);
result.add("PRODUCTNAME");
result.remove("PACKAGE");
return result;
}

private Map<String, String> customPatternMap(final Map<String, String> patterns) {
final var result = new HashMap<String, String>(patterns);
result.put("\\D+\\dml", "PRODUCTNAME");
return result;
}
}
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
package com.github.romualdrousseau.archery.examples;

import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.github.romualdrousseau.archery.Document;
import com.github.romualdrousseau.archery.DocumentFactory;
import com.github.romualdrousseau.archery.parser.LayexTableParser;

public class Tutorial5 implements Runnable {

public Tutorial5() {
public static void main(final String[] args) {
new Tutorial5().run();
}

@Override
public void run() {
final var tableParser = new LayexTableParser(
List.of("(v.$)+"),
List.of("(()(S+$S+$))(()([/^TOTAL/|v].+$)())+(/TOTAL/.+$)"));

final var builder = Common.loadModelBuilderFromGitHub("sales-english");
builder.setTableParser(tableParser);
builder.getEntityList().add("PRODUCTNAME");
builder.getPatternMap().put("\\D+\\dml", "PRODUCTNAME");
builder.getPatternMap().put("(?i)((20|19)\\d{2}-(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)-\\d{2})", "DATE");
final var model = builder.build();

final var model = builder
.setTableParser(this.customTableParser())
.setEntityList(this.customEntities(builder.getEntityList()))
.setPatternMap(this.customPatternMap(builder.getPatternMap()))
.build();

final var file = Common.loadData("document with pivot.xlsx", this.getClass());
try (final var doc = DocumentFactory.createInstance(file, "UTF-8")
Expand All @@ -33,15 +34,30 @@ public void run() {
"sheet.setCapillarityThreshold(0)",
"sheet.setPivotOption(\"WITH_TYPE_AND_VALUE\")",
"sheet.setPivotTypeFormat(\"%s\")")) {

doc.sheets().forEach(s -> Common.addSheetDebugger(s).getTable().ifPresent(t -> {
Common.printTags(t.headers());
Common.printRows(t.rows());
}));
}
}

public static void main(final String[] args) {
new Tutorial5().run();
private LayexTableParser customTableParser() {
return new LayexTableParser(
List.of("(v.$)+"),
List.of("(()(S+$S+$))(()([/^TOTAL/|v].+$)())+(/TOTAL/.+$)"));
}

private List<String> customEntities(final List<String> entities) {
final var result = new ArrayList<String>(entities);
result.add("PRODUCTNAME");
result.remove("PACKAGE");
return result;
}

private Map<String, String> customPatternMap(final Map<String, String> patterns) {
final var result = new HashMap<String, String>(patterns);
result.put("\\D+\\dml", "PRODUCTNAME");
result.put("(?i)((20|19)\\d{2}-(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)-\\d{2})", "DATE");
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,24 @@

public class Tutorial6 implements Runnable {

public Tutorial6() {
public static void main(final String[] args) {
new Tutorial6().run();
}

@Override
public void run() {
final var tableParser = new LayexTableParser(
List.of("(v.$)+"),
List.of("(()(E+$E+$))(()(/^PRODUCTCODE/.+$)*(/PRODUCTCODE/.+$))+()"));

final var builder = Common.loadModelBuilderFromGitHub("sales-english");
builder.setTableParser(tableParser);
final var model = builder.build();

final var model = builder
.setTableParser(this.customTableParser())
.build();

final var file = Common.loadData("document with noises.xls", this.getClass());
try (final var doc = DocumentFactory.createInstance(file, "UTF-8")
.setModel(model)
.setHints(EnumSet.of(Document.Hint.INTELLI_LAYOUT))
.setRecipe(
"sheet.setCapillarityThreshold(1.5)",
"sheet.setCapillarityThreshold(1.5)",
"sheet.setDataTableParserFactory(\"DataTableGroupSubFooterParserFactory\")",
"sheet.dropRowsWhenFillRatioLessThan(0.2)")) {

Expand All @@ -38,7 +37,9 @@ public void run() {
}
}

public static void main(final String[] args) {
new Tutorial6().run();
private LayexTableParser customTableParser() {
return new LayexTableParser(
List.of("(v.$)+"),
List.of("(()(E+$E+$))(()(/^PRODUCTCODE/.+$)*(/PRODUCTCODE/.+$))+()"));
}
}
Loading

0 comments on commit 069b877

Please sign in to comment.