Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#442 - Upgrade to UIMAv3 #443

Merged
merged 12 commits into from
Nov 3, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.SofaCapability;
import org.apache.uima.fit.util.FSCollectionFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.tcas.Annotation;
Expand All @@ -43,16 +47,13 @@
import org.cleartk.srl.type.Chunk;
import org.cleartk.srl.type.Predicate;
import org.cleartk.srl.type.SemanticArgument;
import org.cleartk.syntax.constituent.type.TerminalTreebankNode;
import org.cleartk.syntax.constituent.type.TopTreebankNode;
import org.cleartk.syntax.constituent.type.TreebankNode;
import org.cleartk.syntax.constituent.type.TreebankNodeUtil;
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.cleartk.util.AnnotationUtil;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.SofaCapability;
import org.apache.uima.fit.util.FSCollectionFactory;

/**
* <br>
Expand Down Expand Up @@ -98,14 +99,17 @@ public void process(JCas jCas) throws AnalysisEngineProcessException {
CharniakParseParser parser = new CharniakParseParser(initView);

int numberOfPredicates = 0;
for (Conll2005Line line : conll2005Lines)
if (!line.targetVerb.equals("-"))
for (Conll2005Line line : conll2005Lines) {
if (!line.targetVerb.equals("-")) {
numberOfPredicates += 1;
}
}

int currentPredicate = 0;
PredicateParser predicateParsers[] = new PredicateParser[numberOfPredicates];
for (int i = 0; i < numberOfPredicates; i++)
for (int i = 0; i < numberOfPredicates; i++) {
predicateParsers[i] = new PredicateParser(initView);
}

NamedEntityParser namedEntityParser = new NamedEntityParser(initView);

Expand All @@ -128,7 +132,7 @@ public void process(JCas jCas) throws AnalysisEngineProcessException {
token.setPos(line.pos);
token.addToIndexes();

TreebankNode terminal = new TreebankNode(initView, startIndex, endIndex);
TerminalTreebankNode terminal = new TerminalTreebankNode(initView, startIndex, endIndex);
terminal.setNodeType(line.pos);
terminal.setNodeValue(line.word);
terminal.setChildren(new FSArray(jCas, 0));
Expand Down Expand Up @@ -162,8 +166,9 @@ public void process(JCas jCas) throws AnalysisEngineProcessException {

parser.makeParse();

for (PredicateParser predicateParser : predicateParsers)
for (PredicateParser predicateParser : predicateParsers) {
predicateParser.makePredicate();
}

} catch (CASException e) {
throw new AnalysisEngineProcessException(e);
Expand Down Expand Up @@ -237,8 +242,9 @@ public TreebankNode makeTreebankNode(JCas jCas) {
node.setNodeType(this.type);
node.setChildren(new FSArray(jCas, this.children.size()));
FSCollectionFactory.fillArrayFS(node.getChildren(), this.children);
for (TreebankNode child : this.children)
for (TreebankNode child : this.children) {
child.setParent(node);
}
node.addToIndexes();
return node;
}
Expand All @@ -248,18 +254,18 @@ public TreebankNode makeTreebankNode(JCas jCas) {
private static class CharniakParseParser {
Stack<Constituent> parseStack;

List<TreebankNode> terminals;
List<TerminalTreebankNode> terminals;

JCas jCas;

CharniakParseParser(JCas jCas) {
parseStack = new Stack<Constituent>();
parseStack.push(new Constituent("TOP"));
terminals = new ArrayList<TreebankNode>();
terminals = new ArrayList<>();
this.jCas = jCas;
}

void feed(String segment, TreebankNode terminal) throws IOException {
void feed(String segment, TerminalTreebankNode terminal) throws IOException {
BufferedReader r = new BufferedReader(new StringReader(segment));

terminals.add(terminal);
Expand Down Expand Up @@ -291,8 +297,9 @@ public TopTreebankNode makeParse() {
List<TreebankNode> children = parseStack.peek().children;
node.setChildren(new FSArray(jCas, children.size()));
FSCollectionFactory.fillArrayFS(node.getChildren(), children);
for (TreebankNode child : parseStack.peek().children)
for (TreebankNode child : parseStack.peek().children) {
child.setParent(node);
}
node.setTerminals(new FSArray(jCas, this.terminals.size()));
FSCollectionFactory.fillArrayFS(node.getTerminals(), this.terminals);
node.addToIndexes();
Expand Down Expand Up @@ -376,8 +383,9 @@ void feed(String segment, Token token) throws IOException {
this.argumentTokens = null;
break;
case '*':
if (this.argumentTokens != null)
if (this.argumentTokens != null) {
this.argumentTokens.add(token);
}
break;
default:
throw new IOException("unexpected character in string: " + String.valueOf(c) + " ("
Expand Down Expand Up @@ -409,8 +417,9 @@ private static String readArgumentType(BufferedReader r) throws IOException {
while (true) {
r.mark(1);
int i = r.read();
if (i == -1)
if (i == -1) {
break;
}

char c = (char) i;
if (c == '(' || c == ')' || c == '*') {
Expand Down Expand Up @@ -476,8 +485,9 @@ private static String readName(BufferedReader r) throws IOException {
while (true) {
r.mark(1);
int i = r.read();
if (i == -1)
if (i == -1) {
break;
}

char c = (char) i;
if (c == '*') {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.Collections;
import java.util.List;
import java.util.TreeMap;
import java.util.function.IntPredicate;

import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
Expand Down Expand Up @@ -490,5 +491,35 @@ public int getEnd() {
public String getCoveredText() {
throw new UnsupportedOperationException();
}

@Override
public int _id() {
throw new UnsupportedOperationException();
}

@Override
public int _getTypeCode() {
throw new UnsupportedOperationException();
}

@Override
public int getAddress() {
throw new UnsupportedOperationException();
}

@Override
public void setBegin(int aBegin) {
throw new UnsupportedOperationException();
}

@Override
public void setEnd(int aEnd) {
throw new UnsupportedOperationException();
}

@Override
public void trim(IntPredicate aPredicate) {
throw new UnsupportedOperationException();
}
}
}
20 changes: 17 additions & 3 deletions cleartk-ml-crfsuite/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,23 @@
<artifactId>cleartk-ml</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.core.api.resources-asl</artifactId>
<version>1.6.0</version>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-resources-asl</artifactId>
<version>2.2.0</version>
<exclusions>
<exclusion>
<groupId>org.springframework</groupId>
<artifactId>spring-core</artifactId>
</exclusion>
<exclusion>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
</exclusion>
<exclusion>
<groupId>org.springframework</groupId>
<artifactId> spring-beans</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.cleartk</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
import org.cleartk.util.InputStreamHandler;
import org.cleartk.util.PlatformDetection;

import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils;
import org.dkpro.core.api.resources.ResourceUtils;

/**
* <br>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,12 @@
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.SerializationUtils;
import org.apache.commons.lang3.SerializationUtils;
import org.cleartk.ml.Feature;
import org.cleartk.ml.Instance;
import org.cleartk.ml.jar.DefaultDataWriterFactory;
import org.cleartk.ml.jar.DirectoryDataWriterFactory;
import org.cleartk.ml.jar.Train;
import org.cleartk.ml.liblinear.LibLinearBooleanOutcomeClassifier;
import org.cleartk.ml.liblinear.LibLinearBooleanOutcomeClassifierBuilder;
import org.cleartk.ml.liblinear.LibLinearBooleanOutcomeDataWriter;
import org.cleartk.ml.liblinear.LibLinearStringOutcomeClassifier;
import org.cleartk.ml.liblinear.LibLinearStringOutcomeClassifierBuilder;
import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter;
import org.cleartk.ml.liblinear.ExampleInstanceFactory.BooleanAnnotator;
import org.cleartk.ml.liblinear.ExampleInstanceFactory.StringAnnotator;
import org.cleartk.ml.liblinear.encoder.FeatureNodeArrayEncoder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang3.NotImplementedException;
import org.cleartk.ml.tksvmlight.TreeFeature;
import org.cleartk.ml.tksvmlight.TreeFeatureVector;
/**
Expand All @@ -46,6 +46,7 @@ public abstract class TreeKernel_ImplBase implements ComposableTreeKernel {

protected ForestSumMethod sumMethod = ForestSumMethod.SEQUENTIAL;

@Override
public double evaluate(TreeFeatureVector fv1, TreeFeatureVector fv2) {
double sim = 0.0;
if (sumMethod == ForestSumMethod.SEQUENTIAL) {
Expand All @@ -62,6 +63,7 @@ public double evaluate(TreeFeatureVector fv1, TreeFeatureVector fv2) {
return sim;
}

@Override
public abstract double evaluate(TreeFeature tf1, TreeFeature tf2);

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang3.NotImplementedException;
import org.cleartk.ml.CleartkProcessingException;
import org.cleartk.ml.Feature;
import org.cleartk.ml.encoder.features.FeaturesEncoder;
Expand Down
8 changes: 7 additions & 1 deletion cleartk-ml/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
<groupId>org.cleartk</groupId>
<artifactId>cleartk-util</artifactId>
</dependency>
<!-- test only -->

<!-- test only -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand All @@ -38,5 +39,10 @@
<artifactId>uimaj-document-annotation</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@
*/
package org.cleartk.ml.jar;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.contentOf;
import static org.junit.Assert.assertTrue;

import java.io.File;

import org.apache.uima.UimaContext;
import org.apache.uima.fit.factory.UimaContextFactory;
import org.apache.uima.util.FileUtils;
import org.cleartk.ml.DataWriter;
import org.cleartk.ml.encoder.features.FeaturesEncoder_ImplBase;
import org.cleartk.ml.encoder.features.NameNumberFeaturesEncoder;
Expand Down Expand Up @@ -59,10 +60,9 @@ public void testManifest() throws Throwable {
dataWriter.setFeaturesEncoder(new NameNumberFeaturesEncoder(false, false));
dataWriter.setOutcomeEncoder(new StringToStringOutcomeEncoder());
dataWriter.finish();
File manifestFile = new File(outputDirectory, "MANIFEST.MF");
String actualManifest = FileUtils.file2String(manifestFile);

assertThat(actualManifest).isEqualToIgnoringWhitespace(expectedManifest);

assertThat(contentOf(new File(outputDirectory, "MANIFEST.MF"), UTF_8)) //
.isEqualToIgnoringWhitespace(expectedManifest);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,15 +118,13 @@ public void testConsumeAll() throws Exception {
String expectedManifest = "Manifest-Version: 1.0\n"
+ "classifierBuilderClass: org.cleartk.ml.viterbi.ViterbiClassifierBuilde\n" + " r";

File manifestFile = new File(outputDirectoryName, "MANIFEST.MF");
String actualManifest = contentOf(manifestFile, UTF_8);
assertThat(actualManifest).isEqualToIgnoringWhitespace(expectedManifest);
assertThat(contentOf(new File(outputDirectoryName, "MANIFEST.MF"), UTF_8)) //
.isEqualToIgnoringWhitespace(expectedManifest);

ViterbiClassifierBuilder<String> builder = new ViterbiClassifierBuilder<String>();
File delegatedOutputDirectory = builder.getDelegatedModelDirectory(outputDirectory);
String[] trainingData = FileUtil.loadListOfStrings(new File(
delegatedOutputDirectory,
"training-data.test"));
String[] trainingData = FileUtil.loadListOfStrings(
new File(delegatedOutputDirectory, "training-data.test"));
testFeatures(trainingData[1], "PreviousOutcome_L1_D");
testFeatures(
trainingData[2],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ public static AnalysisEngineDescription getDescription(String languageCode)
return AnalysisEngineFactory.createEngineDescription(
opennlp.uima.postag.POSTagger.class,
opennlp.uima.util.UimaUtil.MODEL_PARAMETER,
ExternalResourceFactory.createExternalResourceDescription(
POSModelResourceImpl.class,
PosTagger.class.getResource(modelPath).toString()),
ExternalResourceFactory.createSharedResourceDescription(
PosTagger.class.getResource(modelPath).toString(),
POSModelResourceImpl.class),
opennlp.uima.util.UimaUtil.SENTENCE_TYPE_PARAMETER,
Sentence.class.getName(),
opennlp.uima.util.UimaUtil.TOKEN_TYPE_PARAMETER,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ public static AnalysisEngineDescription getDescription(String languageCode)
return AnalysisEngineFactory.createEngineDescription(
opennlp.uima.tokenize.Tokenizer.class,
opennlp.uima.util.UimaUtil.MODEL_PARAMETER,
ExternalResourceFactory.createExternalResourceDescription(
TokenizerModelResourceImpl.class,
Tokenizer.class.getResource(modelPath).toString()),
ExternalResourceFactory.createSharedResourceDescription(
Tokenizer.class.getResource(modelPath).toString(),
TokenizerModelResourceImpl.class),
opennlp.uima.util.UimaUtil.SENTENCE_TYPE_PARAMETER,
Sentence.class.getName(),
opennlp.uima.util.UimaUtil.TOKEN_TYPE_PARAMETER,
Expand Down
Loading