diff --git a/.gitignore b/.gitignore index 0777b1ce..5bf8edd3 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,6 @@ common/bin/ lap/bin/ uima.log +.metadata +lap/src/null223365410 +.gitignore diff --git a/alignmentedas/pom.xml b/alignmentedas/pom.xml new file mode 100644 index 00000000..b2f9d456 --- /dev/null +++ b/alignmentedas/pom.xml @@ -0,0 +1,28 @@ + + + 4.0.0 + + eu.excitementproject + eop + 1.1.4 + + alignmentedas + alignmentedas + http://maven.apache.org + + UTF-8 + + + + + eu.excitementproject + core + 1.1.4 + + + eu.excitementproject + lap + 1.1.4 + + + diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/P1EdaRTERunner.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/P1EdaRTERunner.java new file mode 100644 index 00000000..72d2de7f --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/P1EdaRTERunner.java @@ -0,0 +1,126 @@ +package eu.excitementproject.eop.alignmentedas; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +import org.apache.commons.io.FileUtils; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.TEDecisionWithAlignment; +import eu.excitementproject.eop.alignmentedas.p1eda.instances.SimpleWordCoverageDE; +import eu.excitementproject.eop.alignmentedas.p1eda.instances.SimpleWordCoverageEN; +import eu.excitementproject.eop.alignmentedas.p1eda.instances.SimpleWordCoverageIT; +import eu.excitementproject.eop.alignmentedas.p1eda.sandbox.WNVOMT; +import eu.excitementproject.eop.alignmentedas.p1eda.sandbox.WithVO; +import eu.excitementproject.eop.alignmentedas.p1eda.sandbox.WithoutVO; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerDE; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerIT; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * A simple (EOP)-RTE XML data runner for P1EDA configurations + * + */ +@SuppressWarnings("unused") +public class P1EdaRTERunner +{ + public static void main( String[] args ) + { + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.INFO); // set INFO to hide Debug info. + + try + { + // Prepare LAP and EDA (here, both for English) and eval on RTE3 (again, EN) + LAP_ImplBase lapEN = new TreeTaggerEN(); + P1EDATemplate p1edaEN = new SimpleWordCoverageEN(); // Put your (configured, instance) P1EDA here... + evaluateOnRTE3EN(lapEN, p1edaEN, false); // set final argument true, if lap has not been changed from last call. (to reuse saved XMI files) + + // use evaluateOnRTE3DE for German +// LAP_ImplBase lapDE = new TreeTaggerDE(); +// P1EDATemplate p1edaDE = new SimpleWordCoverageDE(); +// evaluateOnRTE3DE(lapDE, p1edaDE, false); + + // use evaluateOnRTE3IT for Italian +// LAP_ImplBase lapIT = new TreeTaggerIT(); +// P1EDATemplate p1edaIT = new SimpleWordCoverageIT(); +// evaluateOnRTE3IT(lapIT, p1edaIT, false); + + } + catch(Exception e) + { + System.err.println("Run stopped with an exception: " + e.getMessage()); + } + + } + + public static void evaluateOnRTE3EN(LAP_ImplBase lap, P1EDATemplate p1eda, boolean isXmiAlreadyPreprocessed) throws LAPException, EDAException, IOException + { + File rteTrainingXML = new File("../core/src/main/resources/data-set/English_dev.xml"); + File rteTestingXML = new File("../core/src/main/resources/data-set/English_test.xml"); + + evaluateOnRTEData(lap, p1eda, rteTrainingXML, rteTestingXML, isXmiAlreadyPreprocessed); + } + + public static void evaluateOnRTE3DE(LAP_ImplBase lap, P1EDATemplate p1eda, boolean isXmiAlreadyPreprocessed) throws LAPException, EDAException, IOException + { + File rteTrainingXML = new File("../core/src/main/resources/data-set/German_dev.xml"); + File rteTestingXML = new File("../core/src/main/resources/data-set/German_test.xml"); + + evaluateOnRTEData(lap, p1eda, rteTrainingXML, rteTestingXML, isXmiAlreadyPreprocessed); + } + + public static void evaluateOnRTE3IT(LAP_ImplBase lap, P1EDATemplate p1eda, boolean isXmiAlreadyPreprocessed) throws LAPException, EDAException, IOException + { + File rteTrainingXML = new File("../core/src/main/resources/data-set/Italian_dev.xml"); + File rteTestingXML = new File("../core/src/main/resources/data-set/Italian_test.xml"); + + evaluateOnRTEData(lap, p1eda, rteTrainingXML, rteTestingXML, isXmiAlreadyPreprocessed); + } + + public static void evaluateOnRTEData(LAP_ImplBase lap, P1EDATemplate p1eda, File trainXML, File testXML, boolean xmiAlreadyPreprocessed) throws LAPException, EDAException, IOException + { + File trainXmiDir = new File("target/trainingXmis/"); + File evalXmiDir = new File("target/testingXmis/"); + + if (!xmiAlreadyPreprocessed) + { + runLAPForXmis(lap, trainXML, trainXmiDir); + runLAPForXmis(lap, testXML, evalXmiDir); + } + + // Train the instance, and save model. + //File classifierModel = new File ("target/cModel.model"); + File classifierModel = new File (CLASSIFIER_MODEL_NAME); + p1eda.startTraining(trainXmiDir, classifierModel); + + // evaluate with test(eval) data + List evalResult = p1eda.evaluateModelWithGoldXmis(evalXmiDir); + + System.out.println("(accuracy, f1, prec, recall, true positive ratio, true negative ratio)"); + System.out.println(evalResult.toString()); + } + + public static void runLAPForXmis(LAP_ImplBase lap, File rteInputXML, File xmiDir) throws LAPException, IOException + { + + if (xmiDir.exists()) { + // delete all contents + FileUtils.deleteDirectory(xmiDir); + } + xmiDir.mkdirs(); + + lap.processRawInputFormat(rteInputXML, xmiDir); + } + + static int unused; + static final String CLASSIFIER_MODEL_NAME = "target/temp.cmodel"; +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/P1EDAFullTemplate.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/P1EDAFullTemplate.java new file mode 100644 index 00000000..6fec54be --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/P1EDAFullTemplate.java @@ -0,0 +1,660 @@ +/** + * + */ +package eu.excitementproject.eop.alignmentedas.p1eda; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Vector; + +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.cas.FSIterator; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.TOP; +import org.apache.uima.jcas.tcas.Annotation; +import org.uimafit.util.JCasUtil; + +import eu.excitement.type.entailment.Pair; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.DecisionLabelWithConfidence; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.LabeledInstance; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.DecisionLabel; +//import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDABasic; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.PlatformCASProber; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; +import static eu.excitementproject.eop.lap.PlatformCASProber.probeCas; + +// WARNING --- A work in progress. don't try to run it yet. +// TODO: Make this as an extension of P1EDASimpleTemplate (hmm.?) + +/** + * This is a template, abstract class for P1 EDA. + * + *

What this template class is for?

+ *

+ * It provides basic "flow" for process and training of alignment-based EDA. + * The template is an "abstract" class, and supposed to be extended to become + * an actual Entailment Decision Algorithm (EDA) class. + * + *

+ * The following two methods *must* be extended (override) + *

+ * + *

+ * The following methods are optional to be extended (EDA can work and classify + * Entailment, even though they are not overridden). They provide optional capabilities. + * + *

+ * + *

It is recommended to check actual example codes to see how you make + * an alignment-based EDA by extending this abstract class. Please check the following + * classes: {@link ClassName} TODO update + * + *

Classifier capability is embedded within the template

+ *

+ * Classification capability of this template is provided by using Weka + * classifier. Changing the classifier and option within Weka is simple: + * TODO change this, and this. + * + * If you want to use some other classifiers; + * TODO check and write this. (e.g. one with Rui's classifier) + * + * Please see the following document for more info: + * TODO fill in URL + * + * @author Tae-Gil Noh + * + */ +public abstract class P1EDAFullTemplate implements EDABasic { + + /** + * The default, no argument constructor for this abstract class. Does nothing + * but initializing two mandatory final fields. They are: logger and classifier. + * + * This constructor does not set evaluateAlignmentParameters. (it will be set as null) + * If your evaluateAlignment override *does not* require parameters (e.g. simple feature + * extractors that does not require parameters); then using this constructor is enough. + * (evaluateAlignments() will be always called with null). + * + * For example, see SimpleWordCoverageP1EDA. + * + */ + public P1EDAFullTemplate() throws EDAException + { + this(null); + } + + + /** + * + * The main constructor for this abstract class. + * + * It does two things: initializing logger + classifier, and store Parameter value + * + * The constructor gets one Vector of parameters: This parameter vector is so called + * "Feature Extractor parameters" or "Evaluate Alignment parameters" -- and the value + * will be passed to evaluateAlignments(). + * + * If your evaluateAlignment override *require* parameters (e.g. weights for each + * aligner, etc), then you have to use this constructor. + * + * @param evaluateAlignmentParameter + * @throws EDAException + */ + public P1EDAFullTemplate(Vector evaluateAlignmentParameter) throws EDAException + { + this.logger = Logger.getLogger(getClass()); + this.classifier = prepareClassifier(); + this.evaluateAlignmentParameters = evaluateAlignmentParameter; + } + + public TEDecisionWithAlignment process(JCas eopJCas) throws EDAException + { + // Here's the basic step of P1 EDA's process(), outlined by the template. + // Note that, the template assumes that you override each of the step-methods. + // (although you are free to override any, including this process()). + + logger.debug("process() has been called with CAS " + eopJCas); + + // Step 0. check JCas: a correct one with all needed annotations? + checkInputJCas(eopJCas); + String pairID = getTEPairID(eopJCas); + logger.info("processing pair with ID: " + pairID); + + // Step 1. add alignments. The method will add various alignment.Link instances + // Once this step is properly called, the JCas holds alignment.Link data in it. + logger.debug("calling addAlignments"); + addAlignments(eopJCas); + + // Step 2. (this is an optional step.) The method will interact / output + // the added alignment links for debug / analysis purpose. (for Tracer) + logger.debug("calling visualizeAlignments"); + visualizeAlignments(eopJCas); + + // Step 3. + logger.debug("calling evaluateAlignments"); + Vector featureValues = evaluateAlignments(eopJCas, evaluateAlignmentParameters); + logger.debug("evaluateAlignments returned feature vector as of; "); + logger.debug(featureValues.toString()); + + // Step 4. (this is also an optional step.) + logger.debug("calling evaluateAlignments"); + visualizeEdaInternals(); + + // Step 5. + // Classification. + logger.debug("calling classifyEntailment"); + DecisionLabelWithConfidence result = classifyEntailment(featureValues); + + // Finally, return a TEDecision object with CAS (which holds alignments) + logger.debug("TEDecision object generated and being returned: " + result.getLabel() + ", " + result.getConfidence()); + return new TEDecisionWithAlignment(result.getLabel(), result.getConfidence(), pairID, eopJCas); + } + + public void initialize(CommonConfig conf) throws EDAException + { + // TODO read from common config table, and call argument version + } + + public void initialize(File modelToLoadBaseName) throws EDAException + { + File classifierModelFile = new File(modelToLoadBaseName.getAbsolutePath() + CLASSIFIER_MODEL_POSTFIX); + File paramSerFile = new File(modelToLoadBaseName.getAbsolutePath() + PARAMETER_SER_POSTFIX); + + try + { + classifier.loadClassifierModel(classifierModelFile); + loadEDAStates(paramSerFile); + } + catch (ClassifierException ce) + { + throw new EDAException("Loading classifier model and/or parameter failed: ", ce); + } + } + + public void startTraining(CommonConfig conf) + { + // TODO read from common config, and call argument version, + } + + + // TODO: for parameter optimization. Once parameter optimization comes in, + // training-sub methods will be provided. + // "Training Classifier" + // "Train Classifier With Parameter Optimizations" + + public void startTraining(File dirTrainingDataXMIFiles, File modelToStoreBaseName) throws EDAException + { + + // This work method will read Xmi files and convert them to labeled feature vectors + // what we call as "LabeledInstance": + // The method does so, by calling "addAlignments", "evaluateAlignments" on + // each of the annotated training(gold) data. + List trainingSet = makeLabeledInstancesFromXmiFiles(dirTrainingDataXMIFiles); + + // finally, calling classifier abstract to train a model + try + { + classifier.createClassifierModel(trainingSet); + } + catch (ClassifierException ce) + { + throw new EDAException("Underlying classifier thrown exception while training a model", ce); + } + + // and store the model and parameters + try + { + classifier.storeClassifierModel(new File(modelToStoreBaseName.getAbsolutePath() + CLASSIFIER_MODEL_POSTFIX)); + this.storeEDAStates(new File(modelToStoreBaseName.getAbsolutePath() + PARAMETER_SER_POSTFIX)); + } + catch (ClassifierException ce) + { + throw new EDAException("Underlying classifier thrown exception while deserializing a model", ce); + } + } + + /** + * A method to be used for evaluation. The method reads XMI files (with labels) and + * use the current model (loaded or trained) and evaluate it over the give XMI files. + * + * returns a List of double values. They are: (accuracy, f1, prec, recall, true positive ratio, true negative ratio) + * + * TODO: CONSIDER: this needs to be broken down into two methods.... or not? + * (decision - for now, as is) + * + * Hmm. Let's say, what is the common step in "optimize" + * + * [PRE] + * 1) Load XMIS (ONCE) + * 2) Add alignments (also ONCE) + * + * [MAIN-LOOP] while exploring (search) best parameters (parameter population, one individual parameter) + * 3) Make a list of labeled instances, with one-individual parameter, and calling evaluateAlignments (MULTIPLE TIMES) + * 4) Train a model with the set, evaluate it. + * + * [POST] + * 5) report the best result, with best individual parameter + * + * Okay, can we reuse, share more from the above? (maybe not. let's worry later. This method itself is almost free.) + * + * @return a double list: (accuracy, f1, prec, recall, true positive ratio, true negative ratio) + + */ + public List evaluateModelWithGoldXmis(File annotatedXmiFiles) throws EDAException + { + // read annotatedXmiFiles, and make a set of labeled instances, + // by calling the utility method (the same one used in startTraining()) + List goldData = makeLabeledInstancesFromXmiFiles(annotatedXmiFiles); + + + // ask the classifier to evaluate it (with current loaded/trained model) on the provided labeled data + List evaluationResult = null; + try { + evaluationResult = classifier.evaluateClassifier(goldData); + } + catch (ClassifierException ce) + { + throw new EDAException ("The classifier was not ready for evalute (make sure a compatible model properly trained and loaded..)", ce); + } + + return evaluationResult; + } + + public void shutdown() + { + // This template itself has nothing to close down. + } + + + /* + * Mandatory methods (steps) that should be overridden. + */ + + /** + * @param input + * @throws EDAException + */ + protected abstract void addAlignments(JCas input) throws EDAException; + + /** + * @param aJCas + * @return + * @throws EDAException + */ + protected abstract Vector evaluateAlignments(JCas aJCas, Vector featureExtractorParameters) throws EDAException; + + +// /** +// * +// * @return +// */ +// protected abstract Vector prepareEvaluateAlignmentParameters(); + + + /* + * Optional methods (steps) that can be overridden. + */ + + protected void visualizeAlignments(JCas CASWithAlignments) + { + // Template default is doing nothing. + } + + protected void visualizeEdaInternals() + { + // Template default is doing nothing. + } + + /** + * Optional methods (steps) that can be overridden. --- but these + * methods provide default functionalities. You can extend if you want. + * But default implementations would also work as well. + */ + + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(); + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + /** + * The method stores EDA-level states and parameters that are set via training to + * a File, so that can be restored when initializing a new EDA instance. + * + * This method is called within the training step (startTraining()) of the EDA. + * + * If you add some additional "internal parameters" (or states) that can affect + * EDA (e.g. things that would affect addAlignments(), evaluateAlignments() and so on.) + * You will also need to save them, in addition to default EDAStates which are defined + * in the methods. Override this method if you have such a case. + * + * @param paramSerFile + * @throws EDAException + */ + protected void storeEDAStates(File paramSerFile) throws EDAException + { + ArrayList> twoParamVectors = new ArrayList>(); + twoParamVectors.add(evaluateAlignmentParameters); + twoParamVectors.add(internalParameters); + + try + { + FileOutputStream os = new FileOutputStream(paramSerFile); + ObjectOutputStream out = new ObjectOutputStream(os); + out.writeObject(twoParamVectors); + out.close(); + os.close(); + logger.info("Parameters stored in " + paramSerFile.getName()); + } + catch(IOException io) + { + throw new EDAException("IO operation to store / serialize parameters failed", io); + } + } + /** + * The method loads EDA-level states and parameters from a file, that has been + * stored in previous training run. + * + * This method is called within the initialization step (initialize()) of the EDA. + * (Roughly the same instance when it loads Classifier model) + * + * If you add some additional "internal parameters" (or states) that can affect + * EDA (e.g. things that would affect addAlignments(), evaluateAlignments() and so on.) + * You will also need to save (after training) and load (when init) them, + * in addition to default EDAStates which are defined in the methods. + * Override this method if you have such a case. + * + * @param paramSerFile + * @throws EDAException + */ + + protected void loadEDAStates(File paramSerFile) throws EDAException + { + if (!paramSerFile.exists()) + { + throw new EDAException("File not found: "+ paramSerFile.getAbsolutePath()); + } + try + { + FileInputStream is = new FileInputStream(paramSerFile); + ObjectInputStream in = new ObjectInputStream(is); + @SuppressWarnings("unchecked") + ArrayList> twoParamVectors = (ArrayList>) in.readObject(); + in.close(); + is.close(); + evaluateAlignmentParameters = twoParamVectors.get(0); + internalParameters = twoParamVectors.get(1); + } + catch(IOException i) + { + throw new EDAException("Reading and deserilaizing the file failed: "+ paramSerFile.getAbsolutePath(), i); + } + catch(ClassNotFoundException c) + { + throw new EDAException("Integrity failure --- serialization file is of an unknown class type that is not parameters "+ paramSerFile.getAbsolutePath(), c); + } + } + + + /** + * This method will be used to check input CAS for P1EDA flow. + * As is, it will do the basic check of CAS as EOP CAS input, via PlatformCASProber.probeCAS(). + * + * If you want to do additional checks, such as existence of specific LAP annotations, + * You can extend this class to do the checks. + * + * EXTENSION of this method is optional, and not mandatory. + * + * @param input JCas that is given to your EDA + * @throws EDAException If the given JCas was not well-formed for your EDA, you should throw this exception + */ + protected void checkInputJCas(JCas input) throws EDAException + { + try { + probeCas(input, null); + } + catch (LAPException e) + { + throw new EDAException("Input CAS is not well-formed CAS as EOP EDA input.", e); + } + } + + protected DecisionLabelWithConfidence classifyEntailment(Vector fValues) throws EDAException + { + DecisionLabelWithConfidence dl = null; + try { + dl = classifier.classifyInstance(fValues); + } + catch (ClassifierException ce) + { + throw new EDAException("underlying classifier throw exception", ce); + } + + return dl; + } + + // + // utility methods + + protected List makeLabeledInstancesFromXmiFiles(File xmiDir) throws EDAException + { + List labeledData = new ArrayList(); + + // walk each XMI files in the Directory ... + File[] files = xmiDir.listFiles(); + if (files == null) + { + throw new EDAException("Path " + xmiDir.getAbsolutePath() + " does not hold XMI files"); + } + + for (File f : files) + { + // is it a XMI file? + // + logger.info("Working with file " + f.getName()); + if(!f.isFile()) + { // no ... + logger.warn(f.toString() + " is not a file... ignore this"); + continue; + } + if(!f.getName().toLowerCase().endsWith("xmi")) // let's trust name, if it does not end with XMI, pass + { + logger.warn(f.toString() + " is not a XMI file... ignoring this"); + continue; + } + + // So, we have an XMI file. Load in to CAS + JCas aTrainingPair = null; + try { + aTrainingPair = PlatformCASProber.probeXmi(f, null); + } + catch (LAPException le) + { + logger.warn("File " + f.toString() + " looks like XMI file, but its contents are *not* proper EOP EDA JCas"); + throw new EDAException("failed to read XMI file into a JCas", le); + } + String pairID = getTEPairID(aTrainingPair); + logger.info("processing pair with ID: " + pairID); + + // convert it into one LabeledInstance by calling + // addAlignments and evaluateAlignments on each of them + logger.debug("adding alignments..."); + addAlignments(aTrainingPair); + + logger.debug("evaluating alignments..."); + Vector fv = evaluateAlignments(aTrainingPair, evaluateAlignmentParameters); + DecisionLabel l = getGoldLabel(aTrainingPair); + if (l == null) + { + throw new EDAException("Gold data has been given to be used as a Labeled Instance: However, the CAS holds no Gold Label!"); + } + + LabeledInstance ins = new LabeledInstance(l, fv); + + logger.debug("a labeled instance has been generated from XMI file " + f.getName()); + logger.debug(fv.toString() + ", " + l.toString()); + labeledData.add(ins); + } + + return labeledData; + } + + + + /** + * + * Get Pair ID from a JCas + * + * @param aJCas + * @return + * @throws EDAException + */ + protected String getTEPairID(JCas aJCas) throws EDAException + { + String id = null; + + // check entailment pair, + FSIterator iter = aJCas.getJFSIndexRepository().getAllIndexedFS(Pair.type); + if (iter.hasNext()) + { + Pair p = (Pair) iter.next(); + id = p.getPairID(); + + if (iter.hasNext()) + { + logger.warn("This JCas has more than one TE Pairs: This P1EDA template only processes single-pair inputs. Any additional pairs are being ignored, and only the first Pair will be processed."); + } + return id; + } + else + { + throw new EDAException("Input CAS is not well-formed CAS as EOP EDA input: missing TE pair"); + } + } + + /** + * get Gold Label from an annotated JCas with Entailment.Pair + * + * @param aJCas + * @return + * @throws EDAException + */ + protected DecisionLabel getGoldLabel(JCas aJCas) throws EDAException + { + String labelString; + DecisionLabel labelEnum; + + FSIterator iter = aJCas.getJFSIndexRepository().getAllIndexedFS(Pair.type); + if (iter.hasNext()) + { + Pair p = (Pair) iter.next(); + labelString = p.getGoldAnswer(); + + if (labelString == null) // there is no gold answer annotated in this Pair + return null; + + labelEnum = DecisionLabel.getLabelFor(labelString); + + if (iter.hasNext()) + { + logger.warn("This JCas has more than one TE Pairs: This P1EDA template only processes single-pair inputs. Any additional pairs are being ignored, and only the first Pair will be processed."); + } + return labelEnum; + } + else + { + throw new EDAException("Input CAS is not well-formed CAS as EOP EDA input: missing TE pair"); + } + } + + /** + * A check utility method. + * A static utility method for checking, if or not, the CAS has a specific + * type of annotation instances, or not. + * + * e.g. haveCASAnnotationType(aJCas, + * + * @param aJCas + * @param annot + * @return + */ + protected static boolean haveCASAnnotationType(JCas aJCas, Class type) throws CASException + { + // returns false, if both of the views do not have the requested type. + JCas hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + if (JCasUtil.select(hypoView, type).size() > 0) + return true; + + JCas textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + + if (JCasUtil.select(textView, type).size() > 0) + return true; + + return false; + } + + + + /** + * This is the vector of parameters, that will be stored at the init time + * (at initialize() for processing, and start_training() time for training) and + * that will be passed to internal method evaluateAlignments(). + * + * Within this P1EDA Template, this value is a fixed value, where it is set as + * a configuration. For "Optimizing" this value ... TODO see this extended template + * + */ + protected Vector evaluateAlignmentParameters; + + + /** + * This is a vector of parameters, that has been "trained" in one startTraining() session. + * This is internal parameters that and external "parameter" optimizer + * + */ + protected Vector internalParameters = null; + + + + + // + // private final fields... + // + private final EDAClassifierAbstraction classifier; + protected final Logger logger; + + // some constants + public final String CLASSIFIER_MODEL_POSTFIX = ".classifier.model"; + public final String PARAMETER_SER_POSTFIX = ".parameters.ser"; + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/P1EDATemplate.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/P1EDATemplate.java new file mode 100644 index 00000000..71301977 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/P1EDATemplate.java @@ -0,0 +1,616 @@ +/** + * + */ +package eu.excitementproject.eop.alignmentedas.p1eda; + +import java.io.File; +//import java.io.FileInputStream; +//import java.io.FileOutputStream; +//import java.io.IOException; +//import java.io.ObjectInputStream; +//import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Vector; + +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.cas.FSIterator; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.TOP; +import org.apache.uima.jcas.tcas.Annotation; +import org.uimafit.util.JCasUtil; + +import eu.excitement.type.entailment.Pair; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.DecisionLabelWithConfidence; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.LabeledInstance; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.DecisionLabel; +//import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDABasic; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.configuration.NameValueTable; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.PlatformCASProber; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; +import static eu.excitementproject.eop.lap.PlatformCASProber.probeCas; + +/** + * This is a template, abstract class for P1 EDA. + * + *

What this template class is for?

+ *

+ * It provides basic "flow" for process and training of alignment-based EDA. + * The template is an "abstract" class, and supposed to be extended to become + * an actual Entailment Decision Algorithm (EDA) class. + * + *

+ * The following two methods *must* be extended (override) + *

+ * + *

+ * The following methods are optional to be extended (EDA can work and classify + * Entailment, even though they are not overridden). They provide optional capabilities. + * + *

+ * + *

It is recommended to check actual example codes to see how you make + * an alignment-based EDA by extending this abstract class. Please check the following + * classes: {@link ClassName} TODO update + * + *

Classifier capability is embedded within the template

+ *

+ * Classification capability of this template is provided by using Weka + * classifier. Changing the classifier and option within Weka is simple: + * TODO change this, and this. + * + * If you want to use some other classifiers; + * TODO check and write this. (e.g. one with Rui's classifier) + * + * Please see the following document for more info: + * TODO fill in URL + * + * @author Tae-Gil Noh + * + */ + +public abstract class P1EDATemplate implements EDABasic { + + /** + * the language + */ + protected String language; + + /** + * the training data directory + */ + protected String trainDIR = null; + + /** + * the test data directory + */ + protected String testDIR = null; + + + /** + * the model file + */ + protected String modelFile = null; + + /** + * The default, no argument constructor for this abstract class. Does nothing + * but initializing two mandatory final fields. They are: logger and classifier. + * + * This constructor does not set evaluateAlignmentParameters. (it will be set as null) + * If your evaluateAlignment override *does not* require parameters (e.g. simple feature + * extractors that does not require parameters); then using this constructor is enough. + * (evaluateAlignments() will be always called with null). + * + * For example, see SimpleWordCoverageP1EDA. + * + */ + public P1EDATemplate() throws EDAException + { + this(null); + } + + + /** + * + * The main constructor for this abstract class. + * + * It does two things: initializing logger + classifier, and store Parameter value + * + * The constructor gets one Vector of parameters: This parameter vector is so called + * "Feature Extractor parameters" or "Evaluate Alignment parameters" -- and the value + * will be passed to evaluateAlignments(). + * + * If your evaluateAlignment override *require* parameters (e.g. weights for each + * aligner, etc), then you have to use this constructor. + * + * @param evaluateAlignmentParameter + * @throws EDAException + */ + public P1EDATemplate(Vector evaluateAlignmentParameter) throws EDAException + { + this.logger = Logger.getLogger(getClass()); + this.classifier = prepareClassifier(); + this.evaluateAlignmentParameters = evaluateAlignmentParameter; + } + + public TEDecisionWithAlignment process(JCas eopJCas) throws EDAException + { + // Here's the basic step of P1 EDA's process(), outlined by the template. + // Note that, the template assumes that you override each of the step-methods. + // (although you are free to override any, including this process()). + + logger.debug("process() has been called with CAS " + eopJCas); + + // Step 0. check JCas: a correct one with all needed annotations? + checkInputJCas(eopJCas); + String pairID = getTEPairID(eopJCas); + logger.info("processing pair with ID: " + pairID); + + // Step 1. add alignments. The method will add various alignment.Link instances + // Once this step is properly called, the JCas holds alignment.Link data in it. + logger.debug("calling addAlignments"); + addAlignments(eopJCas); + + // Step 2. (this is an optional step.) The method will interact / output + // the added alignment links for debug / analysis purpose. (for Tracer) + logger.debug("calling visualizeAlignments"); + visualizeAlignments(eopJCas); + + // Step 3. + logger.debug("calling evaluateAlignments"); + Vector featureValues = evaluateAlignments(eopJCas, evaluateAlignmentParameters); + logger.debug("evaluateAlignments returned feature vector as of; "); + logger.debug(featureValues.toString()); + + // Step 4. (this is also an optional step.) + logger.debug("calling evaluateAlignments"); + visualizeEdaInternals(); + + // Step 5. + // Classification. + logger.debug("calling classifyEntailment"); + DecisionLabelWithConfidence result = classifyEntailment(featureValues); + + // Finally, return a TEDecision object with CAS (which holds alignments) + logger.debug("TEDecision object generated and being returned: " + result.getLabel() + ", " + result.getConfidence()); + return new TEDecisionWithAlignment(result.getLabel(), result.getConfidence(), pairID, eopJCas); + } + + public void initialize(CommonConfig conf) throws EDAException + { + + NameValueTable nameValueTable; + try { + nameValueTable = conf.getSection(this.getClass().getCanonicalName()); + File modelFile = nameValueTable.getFile("modelFile"); + initialize(modelFile); + } catch (ConfigurationException e) { + throw new EDAException ("Reading configuration data failed: " + e.getMessage(), e); + } + } + + public void initialize(File classifierModelFile) throws EDAException + { + try + { + classifier.loadClassifierModel(classifierModelFile); + } + catch (ClassifierException ce) + { + throw new EDAException("Loading classifier model and/or parameter failed: ", ce); + } + } + + public void startTraining(CommonConfig conf) throws EDAException + { + // read from common config, and call argument version, + NameValueTable nameValueTable; + try { + nameValueTable = conf.getSection(this.getClass().getCanonicalName()); + + File trainDir = nameValueTable.getFile("trainDir"); + File modelFileToCreate = nameValueTable.getFile("modelFile"); + + startTraining(trainDir, modelFileToCreate); + } catch (ConfigurationException ce) { + throw new EDAException("Reading configuration from CommonConfig failed: " + ce.getMessage(), ce); + } + } + + + // TODO: for parameter optimization. Once parameter optimization comes in, + // training-sub methods will be provided. + // "Training Classifier" + // "Train Classifier With Parameter Optimizations" + + public void startTraining(File dirTrainingDataXMIFiles, File classifierModelToStore) throws EDAException + { + + // This work method will read Xmi files and convert them to labeled feature vectors + // what we call as "LabeledInstance": + // The method does so, by calling "addAlignments", "evaluateAlignments" on + // each of the annotated training(gold) data. + + + List trainingSet = makeLabeledInstancesFromXmiFiles(dirTrainingDataXMIFiles); + + // finally, calling classifier abstract to train a model + try + { + classifier.createClassifierModel(trainingSet); + } + catch (ClassifierException ce) + { + throw new EDAException("Underlying classifier thrown exception while training a model", ce); + } + + // and store the model and parameters + try + { + classifier.storeClassifierModel(classifierModelToStore); + } + catch (ClassifierException ce) + { + throw new EDAException("Underlying classifier thrown exception while deserializing a model", ce); + } + } + + /** + * A method to be used for evaluation. The method reads XMI files (with labels) and + * use the current model (loaded or trained) and evaluate it over the give XMI files. + * + * returns a List of double values. They are: (accuracy, f1, prec, recall, true positive ratio, true negative ratio) + * + * TODO: CONSIDER: this needs to be broken down into two methods.... or not? + * (decision - for now, as is) + * + * Hmm. Let's say, what is the common step in "optimize" + * + * [PRE] + * 1) Load XMIS (ONCE) + * 2) Add alignments (also ONCE) + * + * [MAIN-LOOP] while exploring (search) best parameters (parameter population, one individual parameter) + * 3) Make a list of labeled instances, with one-individual parameter, and calling evaluateAlignments (MULTIPLE TIMES) + * 4) Train a model with the set, evaluate it. + * + * [POST] + * 5) report the best result, with best individual parameter + * + * Okay, can we reuse, share more from the above? (maybe not. let's worry later. This method itself is almost free.) + * + * @return a double list: (accuracy, f1, prec, recall, true positive ratio, true negative ratio) + + */ + public List evaluateModelWithGoldXmis(File annotatedXmiFiles) throws EDAException + { + // read annotatedXmiFiles, and make a set of labeled instances, + // by calling the utility method (the same one used in startTraining()) + List goldData = makeLabeledInstancesFromXmiFiles(annotatedXmiFiles); + + + // ask the classifier to evaluate it (with current loaded/trained model) on the provided labeled data + List evaluationResult = null; + try { + evaluationResult = classifier.evaluateClassifier(goldData); + } + catch (ClassifierException ce) + { + throw new EDAException ("The classifier was not ready for evalute (make sure a compatible model properly trained and loaded..)", ce); + } + + return evaluationResult; + } + + public void shutdown() + { + // This template itself has nothing to close down. + } + + + /* + * Mandatory methods (steps) that should be overridden. + */ + + /** + * @param input + * @throws EDAException + */ + protected abstract void addAlignments(JCas input) throws EDAException; + + /** + * @param aJCas + * @return + * @throws EDAException + */ + protected abstract Vector evaluateAlignments(JCas aJCas, Vector featureExtractorParameters) throws EDAException; + + +// /** +// * +// * @return +// */ +// protected abstract Vector prepareEvaluateAlignmentParameters(); + + + /* + * Optional methods (steps) that can be overridden. + */ + + protected void visualizeAlignments(JCas CASWithAlignments) + { + // Template default is doing nothing. + } + + protected void visualizeEdaInternals() + { + // Template default is doing nothing. + } + + /** + * Optional methods (steps) that can be overridden. --- but these + * methods provide default functionalities. You can extend if you want. + * But default implementations would also work as well. + */ + + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(); + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + /** + * This method will be used to check input CAS for P1EDA flow. + * As is, it will do the basic check of CAS as EOP CAS input, via PlatformCASProber.probeCAS(). + * + * If you want to do additional checks, such as existence of specific LAP annotations, + * You can extend this class to do the checks. + * + * EXTENSION of this method is optional, and not mandatory. + * + * @param input JCas that is given to your EDA + * @throws EDAException If the given JCas was not well-formed for your EDA, you should throw this exception + */ + protected void checkInputJCas(JCas input) throws EDAException + { + try { + probeCas(input, null); + } + catch (LAPException e) + { + throw new EDAException("Input CAS is not well-formed CAS as EOP EDA input.", e); + } + } + + protected DecisionLabelWithConfidence classifyEntailment(Vector fValues) throws EDAException + { + DecisionLabelWithConfidence dl = null; + try { + dl = classifier.classifyInstance(fValues); + } + catch (ClassifierException ce) + { + throw new EDAException("underlying classifier throw exception", ce); + } + + return dl; + } + + // + // utility methods + + protected List makeLabeledInstancesFromXmiFiles(File xmiDir) throws EDAException + { + List labeledData = new ArrayList(); + + // walk each XMI files in the Directory ... + File[] files = xmiDir.listFiles(); + if (files == null) + { + throw new EDAException("Path " + xmiDir.getAbsolutePath() + " does not hold XMI files"); + } + Arrays.sort(files); + + for (File f : files) + { + // is it a XMI file? + // + logger.info("Working with file " + f.getName()); + if(!f.isFile()) + { // no ... + logger.warn(f.toString() + " is not a file... ignore this"); + continue; + } + if(!f.getName().toLowerCase().endsWith("xmi")) // let's trust name, if it does not end with XMI, pass + { + logger.warn(f.toString() + " is not a XMI file... ignoring this"); + continue; + } + + // So, we have an XMI file. Load in to CAS + JCas aTrainingPair = null; + try { + aTrainingPair = PlatformCASProber.probeXmi(f, null); + } + catch (LAPException le) + { + logger.warn("File " + f.toString() + " looks like XMI file, but its contents are *not* proper EOP EDA JCas"); + throw new EDAException("failed to read XMI file into a JCas", le); + } + String pairID = getTEPairID(aTrainingPair); + logger.info("processing pair with ID: " + pairID); + + // convert it into one LabeledInstance by calling + // addAlignments and evaluateAlignments on each of them + logger.debug("adding alignments..."); + addAlignments(aTrainingPair); + + logger.debug("evaluating alignments..."); + Vector fv = evaluateAlignments(aTrainingPair, evaluateAlignmentParameters); + DecisionLabel l = getGoldLabel(aTrainingPair); + if (l == null) + { + throw new EDAException("Gold data has been given to be used as a Labeled Instance: However, the CAS holds no Gold Label!"); + } + + LabeledInstance ins = new LabeledInstance(l, fv); + + logger.debug("a labeled instance has been generated from XMI file " + f.getName()); + logger.debug(fv.toString() + ", " + l.toString()); + labeledData.add(ins); + } + + return labeledData; + } + + + + /** + * + * Get Pair ID from a JCas + * + * @param aJCas + * @return + * @throws EDAException + */ + protected String getTEPairID(JCas aJCas) throws EDAException + { + String id = null; + + // check entailment pair, + FSIterator iter = aJCas.getJFSIndexRepository().getAllIndexedFS(Pair.type); + if (iter.hasNext()) + { + Pair p = (Pair) iter.next(); + id = p.getPairID(); + + if (iter.hasNext()) + { + logger.warn("This JCas has more than one TE Pairs: This P1EDA template only processes single-pair inputs. Any additional pairs are being ignored, and only the first Pair will be processed."); + } + return id; + } + else + { + throw new EDAException("Input CAS is not well-formed CAS as EOP EDA input: missing TE pair"); + } + } + + /** + * get Gold Label from an annotated JCas with Entailment.Pair + * + * @param aJCas + * @return + * @throws EDAException + */ + protected DecisionLabel getGoldLabel(JCas aJCas) throws EDAException + { + String labelString; + DecisionLabel labelEnum; + + FSIterator iter = aJCas.getJFSIndexRepository().getAllIndexedFS(Pair.type); + if (iter.hasNext()) + { + Pair p = (Pair) iter.next(); + labelString = p.getGoldAnswer(); + + if (labelString == null) // there is no gold answer annotated in this Pair + return null; + + labelEnum = DecisionLabel.getLabelFor(labelString); + + if (iter.hasNext()) + { + logger.warn("This JCas has more than one TE Pairs: This P1EDA template only processes single-pair inputs. Any additional pairs are being ignored, and only the first Pair will be processed."); + } + return labelEnum; + } + else + { + throw new EDAException("Input CAS is not well-formed CAS as EOP EDA input: missing TE pair"); + } + } + + /** + * A check utility method. + * A static utility method for checking, if or not, the CAS has a specific + * type of annotation instances, or not. + * + * e.g. haveCASAnnotationType(aJCas, + * + * @param aJCas + * @param annot + * @return + */ + protected static boolean haveCASAnnotationType(JCas aJCas, Class type) throws CASException + { + // returns false, if both of the views do not have the requested type. + JCas hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + if (JCasUtil.select(hypoView, type).size() > 0) + return true; + + JCas textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + + if (JCasUtil.select(textView, type).size() > 0) + return true; + + return false; + } + + + + /** + * This is the vector of parameters, that will be stored at the init time + * (at initialize() for processing, and start_training() time for training) and + * that will be passed to internal method evaluateAlignments(). + * + * Within this P1EDA Template, this value is a fixed value, where it is set as + * a configuration. For "Optimizing" this value ... TODO see this extended template + * + */ + protected Vector evaluateAlignmentParameters; + + + /** + * This is a vector of parameters, that has been "trained" in one startTraining() session. + * This is internal parameters that and external "parameter" optimizer + * + */ + protected Vector internalParameters = null; + + // + // private final fields... + // + private final EDAClassifierAbstraction classifier; + protected final Logger logger; + + // some constants +// public final String CLASSIFIER_MODEL_POSTFIX = ".classifier.model"; +// public final String PARAMETER_SER_POSTFIX = ".parameters.ser"; + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/TEDecisionWithAlignment.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/TEDecisionWithAlignment.java new file mode 100644 index 00000000..4acd61c8 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/TEDecisionWithAlignment.java @@ -0,0 +1,60 @@ +package eu.excitementproject.eop.alignmentedas.p1eda; + +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.DecisionLabel; +import eu.excitementproject.eop.common.TEDecision; + +/** + * A TEDecision implementation that holds the additional information for + * alignment. The alignment data can be accessed by get the underlying JCas + * that holds the alignments, which were used by the (alignment based) EDA + * to make the entailment decision. + * + * The method that has been added for this alignment data is getJCasWithAlignment(). + * It returns the underlying JCas, and all its data (including alignment data) can be + * accessed as normal JCas data. + * + * @author Tae-Gil Noh + * @since July 2014 + */ +public class TEDecisionWithAlignment implements TEDecision { + + public TEDecisionWithAlignment(DecisionLabel label, Double confidence, String pairID, JCas casWithAlignment) + { + this.label = label; + this.confidence = confidence; + this.pairID = pairID; + this.theJCas = casWithAlignment; + } + + @Override + public DecisionLabel getDecision() { + return label; + } + + @Override + public double getConfidence() { + return confidence; + } + + @Override + public String getPairID() { + return pairID; + } + + /** + * Call this method to access the underlying JCas, which includes the alignment data, and all annotations. + * + * @return JCas the JCas that holds alignments and all other annotations that were used for EDA decision. + */ + public JCas getJCasWithAlignment() { + return theJCas; + } + + private final DecisionLabel label; + private final double confidence; + private final String pairID; + private final JCas theJCas; + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/classifiers/EDABinaryClassifierFromWeka.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/classifiers/EDABinaryClassifierFromWeka.java new file mode 100644 index 00000000..13a1196b --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/classifiers/EDABinaryClassifierFromWeka.java @@ -0,0 +1,468 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.classifiers; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.Vector; + +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.DecisionLabelWithConfidence; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.LabeledInstance; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ValueException; +import eu.excitementproject.eop.common.DecisionLabel; +import weka.classifiers.Classifier; +import weka.classifiers.Evaluation; +//import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.core.Attribute; +import weka.core.FastVector; +import weka.core.Instance; +import weka.core.Instances; + + +/** + * + * An implementation of "EDAClassifierAbstraction", based on Weka. + * + * You can use any of the Weka's Classifiers + * (e.g. Classifier classes that support Weka's Classifier interface, "distributionForInstance") + * + * You can change the underlying classifier (that is supported in Weka) + * by extend/override prepareWekaClassifierInstance(). + * + * Note that, this class only supports binary classifications only; Any LabeledInstance given + * to the training step that is not DecisionLabel.Entailment DecisionLabel.NonEntailment will cause + * the implementation to throw an exception. + * + * @author Tae-Gil Noh + * + */ +public class EDABinaryClassifierFromWeka implements EDAClassifierAbstraction { + + /** + * Default (argument-less) constructor. Will initialize the instance with + * a default Weka-classifier (here, that default is, NaiveBayes with Kernel density estimation) + * + * @throws ClassifierException + */ + public EDABinaryClassifierFromWeka() throws ClassifierException { + this(new Logistic(), null); // logistic regression is generally go good in most situations. + //this(new NaiveBayes(), new String[] {"-K"}); + } + + /** + * The main constructor. + * + * Pass Weka Classifier Instance, and its options. Then the class will provide all EDAClassifierAbstraction + * methods. + * + * Some possible classes for wekaClassifierInstance. (See their Weka JavaDoc for all option stings) + * - NaiveBayes() (weka.classifiers.bayes.NaiveBayes) + * - Logistic() (weka.classifiers.functions.Logistic) (a logistic regression) + * + * TODO: write more some commonly used classifier names, and options; as we test more and more. + * + * @param wekaClassifierInstance One weka Classifier instance (e.g. new Logistic()) + * @param classifierOptions Option String[] for the give classifier instance. null means, no passing of options + * @throws ClassifierException + */ + public EDABinaryClassifierFromWeka(Classifier wekaClassifierInstance, String[] classifierOptions) throws ClassifierException + { + this.modelReady = false; // this will become true, only after train (or load model) + this.classifier = wekaClassifierInstance; + + if (classifierOptions != null) + { + try { + classifier.setOptions(classifierOptions); + } + catch (Exception e) + { + throw new ClassifierException("Underlying Weka classifier class failed to initialize.", e); + } + } + } + +// /** +// * Extend/Override this method to change the classifier & its option. +// * +// * @throws ClassifierException +// */ +// protected void prepareWekaClassifierInstance() throws ClassifierException +// { +// modelReady = false; +// classifier = (Classifier)new NaiveBayes(); +// String[] options = {"-K"}; +// try +// { +// classifier.setOptions(options); +// } +// catch (Exception e) +// { +// throw new ClassifierException("Unable to set classifier options", e); +// } +// } + + @Override + public DecisionLabelWithConfidence classifyInstance( + Vector featureVector) throws ClassifierException { + + if (!modelReady) + { + throw new ClassifierException("The classifier is not ready for classification; either training, or loading model should be done before calling classify"); + } + + DecisionLabelWithConfidence result = null; + + // Prepare Feature information (e.g. which index has what feature? required for Weka) + Instances attributeInfo = null; + try { + attributeInfo = buildAttributeInfo(featureVector); + } + catch (ValueException ve) + { + throw new ClassifierException("Reading a FeatureValue failed for some reason - must be a code bug! ", ve); + } + + // Make a new instance, with the given feature vector. + Instance anInstance = null; + try { + anInstance = new Instance(featureVector.size()); + anInstance.setDataset(attributeInfo); + for(int i=0; i < featureVector.size(); i++) + { + FeatureValue f = featureVector.get(i); + Attribute attr = attributeInfo.attribute(i); + + switch (f.getValueType()) + { + case NOMINAL: + anInstance.setValue(attr, f.getNominalValue().toString()); + break; + case BOOLEAN: + anInstance.setValue(attr, f.getBooleanValue().toString()); + break; + case DOUBLE: + anInstance.setValue(attr, f.getDoubleValue()); + } + } + } + catch (ValueException ve) + { + throw new ClassifierException("Reading a FeatureValue failed for some reason - must be a code bug! ", ve); + } + + // Okay, classify the newly prepared instance. + double[] dist = null; + try { + dist = classifier.distributionForInstance(anInstance); + } + catch (Exception e) + { + throw new ClassifierException("Underlying Weka classifier throws an exception:" + e.getMessage(), e); + } + + if (dist[0] > dist[1]) + { + result = new DecisionLabelWithConfidence(DecisionLabel.Entailment, dist[0]); + } + else + { + result = new DecisionLabelWithConfidence(DecisionLabel.NonEntailment, dist[1]); + } + + return result; + } + + @Override + public void createClassifierModel(List goldData) + throws ClassifierException { + + // Okay; first, convert the goldData into "Weka" training data (Instances). + // So we can train a Weka Classifier instance prepared within this wrapper class. + Instances trainingData = null; + try { + trainingData = buildTrainingDataSet(goldData); + + } catch (ValueException ve) + { + throw new ClassifierException("Failed to read FeatureValue of training data. Must be a bug in code", ve); + } + + // Okay, pass trainingData to build classifier. + + try { + classifier.buildClassifier(trainingData); + } + catch (Exception e) + { + throw new ClassifierException("Underlying Weka Classifier throws exception at training time...", e); + } + + // Okay. training is done and the classifier is ready for classification. + modelReady = true; + } + + @Override + public void storeClassifierModel(File path) throws ClassifierException { + if (!modelReady) + { + throw new ClassifierException("The classifier is not ready for classification; either training, or loading model should be done before calling classify"); + } + + // Okay, store the model in the given path. + try { + weka.core.SerializationHelper.write(path.getAbsolutePath(), classifier); + } + catch (Exception e) + { + throw new ClassifierException("Serializing the trainined Weka classifier model failed, Weka serializationHelper raised an exception: ", e); + } + + } + + @Override + public void loadClassifierModel(File path) throws ClassifierException { + + if (!path.exists()) + { + throw new ClassifierException("Unable to load trained classifier model; Model file " + path.toString() + " does not exist"); + } + + try { + classifier = (Classifier) weka.core.SerializationHelper.read(path.getAbsolutePath()); + } + catch (Exception e) + { + + } + + modelReady = true; + } + + @Override + public List evaluateClassifier(List goldData) + throws ClassifierException { + + // DECIDED: TODOconsider: make it solely as cross-validation? or option for cross-validation? + // This code does "as-is" evaluation. + // To do cross-validation of the given goldData, support other doCrossValidationEvaluation() + + Instances trainingData = null; + Evaluation eTest = null; + try { + trainingData = buildTrainingDataSet(goldData); + + } catch (ValueException ve) + { + throw new ClassifierException("Failed to read FeatureValue of training data. Must be a bug in code", ve); + } + catch (Exception e) + { + throw new ClassifierException("Underlying Weka Classifier Evaluator throws an exception", e); + } + + try { + eTest = new Evaluation(trainingData); + eTest.evaluateModel(classifier, trainingData); + } + catch (Exception e) + { + throw new ClassifierException("Underlying Weka Classifier Evaluator throws an exception", e); + } + + // DCODE - as log debug? + // System.out.println(eTest.toSummaryString()); + + double tp = eTest.weightedTruePositiveRate(); + double tn = eTest.weightedTrueNegativeRate(); + double prec = eTest.weightedPrecision(); + double rec = eTest.weightedRecall(); + double f1 = eTest.weightedFMeasure(); + double accuracy = (eTest.correct()) / (eTest.incorrect() + eTest.correct()); + + List evalResult = new ArrayList(); + evalResult.add(accuracy); + evalResult.add(f1); + evalResult.add(prec); + evalResult.add(rec); + evalResult.add(tp); + evalResult.add(tn); + + return evalResult; + } + + /** + * @param vec + * @return + * @throws ValueException + */ + private Instances buildAttributeInfo(Vector vec) throws ValueException + { + + FastVector fvWekaAttributes = new FastVector(vec.size() + 1); // why +1? Some Weka classifier cries out loud if this (column size) is different from + + for (int i=0; i < vec.size(); i++) + { + FeatureValue f = vec.get(i); + Attribute attr = null; + + switch(f.getValueType()) + { + case BOOLEAN: + // build boolean attribute; + // we build a Weka nominal attribute with "true" and "false" + FastVector fvBooleanVal = new FastVector(2); + fvBooleanVal.addElement("true"); + fvBooleanVal.addElement("false"); + attr = new Attribute(Integer.toString(i) + "_aBoolean", fvBooleanVal); + fvWekaAttributes.addElement(attr); + break; + + case NOMINAL: + // build nominal attribute, from enums + Enum e = f.getNominalValue(); + Enum[] elist = e.getClass().getEnumConstants(); + FastVector fvNominalVal = new FastVector(elist.length); + for(int j=0; j < elist.length; j++) + { + fvNominalVal.addElement(elist[j].toString()); + } + attr = new Attribute(Integer.toString(i) + "_aNominal", fvNominalVal); + fvWekaAttributes.addElement(attr); + break; + + case DOUBLE: + // build double (numeric) attribute + attr = new Attribute(Integer.toString(i) + "_aNumeric"); + fvWekaAttributes.addElement(attr); + break; + } + } + + // Features are ready, but put class variable (although not meaningful) + // Some Weka classifiers cry out loud if the number of features (including class variable) is different... + FastVector fvClassVal = new FastVector(2); + fvClassVal.addElement(DecisionLabel.Entailment.toString()); + fvClassVal.addElement(DecisionLabel.NonEntailment.toString()); + Attribute ClassAttribute = new Attribute("theClass", fvClassVal); + fvWekaAttributes.addElement(ClassAttribute); + + + Instances attributeTable = new Instances("table", fvWekaAttributes, 10); + attributeTable.setClass(ClassAttribute); + + return attributeTable; + + } + + private Instances buildTrainingDataSet(List gold) throws ValueException, ClassifierException + { + + // Let's first prepare attribute (feature) header, from the first feature vector + Vector vec = gold.get(0).getFeatureVector(); + int featureSize = vec.size(); + FastVector fvWekaAttributes = new FastVector(featureSize + 1); // + 1 for label column + + + // for each value type, prepare attribute column accordingly ... + for (int i=0; i < vec.size(); i++) + { + FeatureValue f = vec.get(i); + Attribute attr = null; + + switch(f.getValueType()) + { + case BOOLEAN: + // build boolean attribute; + // we build a Weka nominal attribute with "true" and "false" + FastVector fvBooleanVal = new FastVector(2); + fvBooleanVal.addElement("true"); + fvBooleanVal.addElement("false"); + attr = new Attribute(Integer.toString(i) + "_aBoolean", fvBooleanVal); + fvWekaAttributes.addElement(attr); + break; + + case NOMINAL: + // build nominal attribute, from enums + Enum e = f.getNominalValue(); + Enum[] elist = e.getClass().getEnumConstants(); + FastVector fvNominalVal = new FastVector(elist.length); + for(int j=0; j < elist.length; j++) + { + fvNominalVal.addElement(elist[j].toString()); + } + attr = new Attribute(Integer.toString(i) + "_aNominal", fvNominalVal); + fvWekaAttributes.addElement(attr); + break; + + case DOUBLE: + // build double (numeric) attribute + attr = new Attribute(Integer.toString(i) + "_aNumeric"); + fvWekaAttributes.addElement(attr); + break; + } + } + + // finally, add "class (decision label)" column + // This class, limits it as ENTAILMENT NONENTAILMENT only. + // (Binary classification only) + FastVector fvClassVal = new FastVector(2); + fvClassVal.addElement(DecisionLabel.Entailment.toString()); + fvClassVal.addElement(DecisionLabel.NonEntailment.toString()); + Attribute ClassAttribute = new Attribute("theClass", fvClassVal); + fvWekaAttributes.addElement(ClassAttribute); + + // okay, prepare an empty instances table with fvWekaAttributes + Instances trainingSet = new Instances("trainingData", fvWekaAttributes, gold.size()); + trainingSet.setClass(ClassAttribute); + + // Table Ready. now populate each and every LabeledInstance into trainingSet + for (LabeledInstance inst : gold) + { + Vector featureVector = inst.getFeatureVector(); + DecisionLabel goldLabel = inst.getLabel(); + + // two sanity checks + // if DecisionLabel is other then Entailment / NonEntailment, raise exception + if ((goldLabel != DecisionLabel.Entailment) && (goldLabel != DecisionLabel.NonEntailment)) + { + throw new ClassifierException("Sorry, this classifier abstract only treats binary classification... "); + } + + // prepare an instance with feature values ... + Instance anInstance = new Instance(featureSize + 1); + anInstance.setDataset(trainingSet); + for(int i=0; i < featureSize; i++) + { + FeatureValue f = featureVector.get(i); + Attribute attr = trainingSet.attribute(i); + + switch (f.getValueType()) + { + case NOMINAL: + anInstance.setValue(attr, f.getNominalValue().toString()); + break; + case BOOLEAN: + anInstance.setValue(attr, f.getBooleanValue().toString()); + break; + case DOUBLE: + anInstance.setValue(attr, f.getDoubleValue()); + } + } + // and finally add class label + anInstance.setValue(ClassAttribute, inst.getLabel().toString()); + + // Okay this instance is ready. Put it in the training set. + trainingSet.add(anInstance); + } + return trainingSet; + } + + // private data + + private Classifier classifier; + private Boolean modelReady; +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/classifiers/package-info.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/classifiers/package-info.java new file mode 100644 index 00000000..f8af16e7 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/classifiers/package-info.java @@ -0,0 +1,8 @@ +/** + * + * This package holds instances of EDAClassifierAbstraction interfaces. + * + * Various Classification (Machine learning) libraries mapped into EDAClassificationAbstraction. + * + */ +package eu.excitementproject.eop.alignmentedas.p1eda.classifiers; \ No newline at end of file diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/MinimalP1EDA.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/MinimalP1EDA.java new file mode 100644 index 00000000..6f332ef0 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/MinimalP1EDA.java @@ -0,0 +1,166 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.instances; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +@SuppressWarnings("unused") +public class MinimalP1EDA extends P1EDATemplate { + + public MinimalP1EDA() throws EDAException + { + try { + aligner = new IdenticalLemmaPhraseLinker(); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + // Here, just one aligner... (same lemma linker) + try { + aligner.annotate(input); + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent aligner; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/SimpleWordCoverageDE.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/SimpleWordCoverageDE.java new file mode 100644 index 00000000..0793fc43 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/SimpleWordCoverageDE.java @@ -0,0 +1,193 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.instances; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.DerivBaseDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermaNetDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermanTransDMDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +/** + * A simple German EDA instance based on three basic (language independent) feature extractors. + * + * On this setup, the best value was 64.5% accuracy with the following two alingers. + * (identical lemma + GermaNet) + * + * @author Tae-Gil Noh + * + */ +@SuppressWarnings("unused") +public class SimpleWordCoverageDE extends P1EDATemplate { + + public SimpleWordCoverageDE() throws EDAException + { + // And let's prepare the aligner instances and scoring components... + try { + identicalLemmaLinker = new IdenticalLemmaPhraseLinker(); + meteorParaphraseLinker = new MeteorPhraseLinkerDE(); + derivBaseLinker = new DerivBaseDELinker(); + distSimLinker = new GermanTransDMDELinker(); + germaNetLinker = new GermaNetDELinker("/Users/tailblues/germanet-8.0/GN_V80_XML/"); // please provide correct path for GermaNet!! + // see GermaNetDELinker for detail ... + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + try { + identicalLemmaLinker.annotate(input); +// meteorParaphraseLinker.annotate(input); +// derivBaseLinker.annotate(input); +// distSimLinker.annotate(input); + germaNetLinker.annotate(input); + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + + // VerbCoverage scorer as-is, generally don't work well with German. + // (should use more German specific, predicate coverage approximation) +// Vector score3 = verbCoverageScorer.calculateScores(aJCas); +// // we know Verb Coverage counter returns 2 numbers. +// // (number of covered Vs in H, number of all Vs in H) +// double ratio_V = 0; +// // special case first... (hmm would be rare but) +// if(score3.get(1) ==0) +// ratio_V = 1.0; +// else +// { +// ratio_V = score3.get(0) / score3.get(1); +// } +// fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent identicalLemmaLinker; + AlignmentComponent meteorParaphraseLinker; + AlignmentComponent derivBaseLinker; + AlignmentComponent distSimLinker; + AlignmentComponent germaNetLinker; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/SimpleWordCoverageEN.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/SimpleWordCoverageEN.java new file mode 100644 index 00000000..8bbb4ad6 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/SimpleWordCoverageEN.java @@ -0,0 +1,187 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.instances; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +/** + * + * + * + * (On this simple coverage setup, best was with all four aligners, with three features (without verb coverage ratio) + * on RTE3: 67.0) + * + * @author Tae-Gil Noh + */ +@SuppressWarnings("unused") +public class SimpleWordCoverageEN extends P1EDATemplate { + + public SimpleWordCoverageEN() throws EDAException + { + try { + aligner1 = new IdenticalLemmaPhraseLinker(); + aligner2 = new MeteorPhraseLinkerEN(); + aligner3 = new WordNetENLinker(null); + aligner4 = new VerbOceanENLinker(); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + // Here, just one aligner... (same lemma linker) + try { + aligner1.annotate(input); + aligner2.annotate(input); + aligner3.annotate(input); // WordNet. Really slow in its current form. (several hours) + aligner4.annotate(input); + + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + // For English, verb coverage feature doesn't seem to work well. + //fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + // Now return the feature vector. The P1EDA template will use this. + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent aligner1; + AlignmentComponent aligner2; + AlignmentComponent aligner3; + AlignmentComponent aligner4; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/SimpleWordCoverageIT.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/SimpleWordCoverageIT.java new file mode 100644 index 00000000..144a72a4 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/SimpleWordCoverageIT.java @@ -0,0 +1,184 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.instances; + +import java.io.File; +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetITLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerIT; + +/** + * + * + * (Best configuration on this simple coverage Italian was: 65.125 on RTE3.) + * + * @author Tae-Gil Noh + */ +@SuppressWarnings("unused") +public class SimpleWordCoverageIT extends P1EDATemplate { + + public SimpleWordCoverageIT() throws EDAException + { + try { + identicalLemmaLinker = new IdenticalLemmaPhraseLinker(); + paraphraseLinker = new MeteorPhraseLinkerIT(); + italianWordNetLinker = new WordNetITLinker(new File("/Users/tailblues/eop-resources-1.1.3/ontologies/ItalianWordNet-dict")); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + try { + identicalLemmaLinker.annotate(input); + // slightly better result without using paraphrase linker, but... + paraphraseLinker.annotate(input); + italianWordNetLinker.annotate(input); + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + // For Italian, PN coverage seems to work less well... + //fv.add(new FeatureValue(ratio_ner)); + + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent identicalLemmaLinker; + AlignmentComponent paraphraseLinker; + AlignmentComponent italianWordNetLinker; + AlignmentComponent italianWikiLinker; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/package-info.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/package-info.java new file mode 100644 index 00000000..1247c89d --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/instances/package-info.java @@ -0,0 +1,13 @@ +/** + * + * This package holds "configured" instances of P1EDAs. + * + * Note that, by configure, it means that the "P1EDA template" has been extended, + * and filled in with "minimally required" two methods. + * + * See ... for example; SimpleWordCoverageEN class, where it implements two abstract methods + * addAnnotations() and evaluateAlignments(). + * + * @author Tae-Gil Noh + */ +package eu.excitementproject.eop.alignmentedas.p1eda.instances; \ No newline at end of file diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/package-info.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/package-info.java new file mode 100644 index 00000000..d01613af --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/package-info.java @@ -0,0 +1,16 @@ + +/** + * + * This package holds various template classes for Phase-1 Alignment-based EDA, which is a + * prototype (or a proof-of-concept) EDA which tries to build EDA as an "open" box (vs. black box). + * + * How to read / access the classes in this package? + * + * (TODO fill in overview here once finished) + * + * - check p1eda.instances for running examples, such as English, German and Italian P1EDA instances. + * + * @author Tae-Gil Noh + * @since July 2014 + */ +package eu.excitementproject.eop.alignmentedas.p1eda; \ No newline at end of file diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DE.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DE.java new file mode 100644 index 00000000..8494f2d9 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DE.java @@ -0,0 +1,186 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.DerivBaseDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermaNetDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermanTransDMDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +/** + * Fast aNd Reliable, word-coverage based German configuration of P1EDA. + * (This setting will get you around 63-64% accuracy on German RTE3. + * Not the best setting, but more reliable, and fast to be used on "any" text. ) + * + * @author Tae-Gil Noh + * + */ +@SuppressWarnings("unused") +public class FNR_DE extends P1EDATemplate { + + public FNR_DE() throws EDAException + { + // And let's prepare the aligner instances and scoring components... + try { + identicalLemmaLinker = new IdenticalLemmaPhraseLinker(); + meteorParaphraseLinker = new MeteorPhraseLinkerDE(); +// derivBaseLinker = new DerivBaseDELinker(); +// distSimLinker = new GermanTransDMDELinker(); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + try { + identicalLemmaLinker.annotate(input); + meteorParaphraseLinker.annotate(input); +// derivBaseLinker.annotate(input); +// distSimLinker.annotate(input); + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent identicalLemmaLinker; + AlignmentComponent meteorParaphraseLinker; + AlignmentComponent derivBaseLinker; + AlignmentComponent distSimLinker; + AlignmentComponent germaNetLinker; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar1.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar1.java new file mode 100644 index 00000000..33550ea7 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar1.java @@ -0,0 +1,186 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.DerivBaseDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermaNetDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermanTransDMDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +/** + * Fast aNd Reliable, word-coverage based German configuration of P1EDA. + * + * var1: is FNR_DE + derivBase links + * + * @author Tae-Gil Noh + * + */ +@SuppressWarnings("unused") +public class FNR_DEvar1 extends P1EDATemplate { + + public FNR_DEvar1() throws EDAException + { + // And let's prepare the aligner instances and scoring components... + try { + identicalLemmaLinker = new IdenticalLemmaPhraseLinker(); + meteorParaphraseLinker = new MeteorPhraseLinkerDE(); + derivBaseLinker = new DerivBaseDELinker(); +// distSimLinker = new GermanTransDMDELinker(); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + try { + identicalLemmaLinker.annotate(input); + meteorParaphraseLinker.annotate(input); + derivBaseLinker.annotate(input); +// distSimLinker.annotate(input); + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent identicalLemmaLinker; + AlignmentComponent meteorParaphraseLinker; + AlignmentComponent derivBaseLinker; + AlignmentComponent distSimLinker; + AlignmentComponent germaNetLinker; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar2.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar2.java new file mode 100644 index 00000000..415fcee7 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar2.java @@ -0,0 +1,186 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.DerivBaseDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermaNetDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermanTransDMDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +/** + * Fast aNd Reliable, word-coverage based German configuration of P1EDA. + * + * var2: FNR + German TransDM links + * + * @author Tae-Gil Noh + * + */ +@SuppressWarnings("unused") +public class FNR_DEvar2 extends P1EDATemplate { + + public FNR_DEvar2() throws EDAException + { + // And let's prepare the aligner instances and scoring components... + try { + identicalLemmaLinker = new IdenticalLemmaPhraseLinker(); + meteorParaphraseLinker = new MeteorPhraseLinkerDE(); +// derivBaseLinker = new DerivBaseDELinker(); + distSimLinker = new GermanTransDMDELinker(); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + try { + identicalLemmaLinker.annotate(input); + meteorParaphraseLinker.annotate(input); +// derivBaseLinker.annotate(input); + distSimLinker.annotate(input); + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent identicalLemmaLinker; + AlignmentComponent meteorParaphraseLinker; + AlignmentComponent derivBaseLinker; + AlignmentComponent distSimLinker; + AlignmentComponent germaNetLinker; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar3.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar3.java new file mode 100644 index 00000000..d97fb8a1 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar3.java @@ -0,0 +1,186 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.DerivBaseDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermaNetDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermanTransDMDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +/** + * Fast aNd Reliable, word-coverage based German configuration of P1EDA. + * + * var3: FNR_DE + derivBase and TransDM links + * + * @author Tae-Gil Noh + * + */ +@SuppressWarnings("unused") +public class FNR_DEvar3 extends P1EDATemplate { + + public FNR_DEvar3() throws EDAException + { + // And let's prepare the aligner instances and scoring components... + try { + identicalLemmaLinker = new IdenticalLemmaPhraseLinker(); + meteorParaphraseLinker = new MeteorPhraseLinkerDE(); + derivBaseLinker = new DerivBaseDELinker(); + distSimLinker = new GermanTransDMDELinker(); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + try { + identicalLemmaLinker.annotate(input); + meteorParaphraseLinker.annotate(input); + derivBaseLinker.annotate(input); + distSimLinker.annotate(input); + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent identicalLemmaLinker; + AlignmentComponent meteorParaphraseLinker; + AlignmentComponent derivBaseLinker; + AlignmentComponent distSimLinker; + AlignmentComponent germaNetLinker; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar4.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar4.java new file mode 100644 index 00000000..f28c9b80 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_DEvar4.java @@ -0,0 +1,181 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.DerivBaseDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermaNetDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.GermanTransDMDELinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +/** + * Fast aNd Reliable, word-coverage based German configuration of P1EDA. + * + * var4: FNR_DE *minus* German para-phrase links. This means, only identical lemma links. + * the bare minimum. + * + * @author Tae-Gil Noh + * + */ +@SuppressWarnings("unused") +public class FNR_DEvar4 extends P1EDATemplate { + + public FNR_DEvar4() throws EDAException + { + // And let's prepare the aligner instances and scoring components... + try { + identicalLemmaLinker = new IdenticalLemmaPhraseLinker(); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + try { + identicalLemmaLinker.annotate(input); + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent identicalLemmaLinker; + AlignmentComponent meteorParaphraseLinker; + AlignmentComponent derivBaseLinker; + AlignmentComponent distSimLinker; + AlignmentComponent germaNetLinker; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_EN.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_EN.java new file mode 100644 index 00000000..344c679f --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_EN.java @@ -0,0 +1,177 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +/** + * + * Fast aNd Reliable, word-coverage based English configuration of P1EDA. + * (This setting will get you around 65-66% accuracy on RTE3. Not the best setting, + * but more reliable, and fast to be used on "any" text. ) + * + * Mainly to be used for WP6 experiments. + * + * @author Tae-Gil Noh + */ +@SuppressWarnings("unused") +public class FNR_EN extends P1EDATemplate { + + public FNR_EN() throws EDAException + { + try { + aligner1 = new IdenticalLemmaPhraseLinker(); + aligner2 = new MeteorPhraseLinkerEN(); +// aligner3 = new WordNetENLinker(null); // due to its slowness. +// aligner4 = new VerbOceanENLinker(); // due to its usage of fixed-path. + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + // Here, just one aligner... (same lemma linker) + try { + aligner1.annotate(input); + aligner2.annotate(input); +// aligner3.annotate(input); // WordNet. Really slow in its current form. (several hours) +// aligner4.annotate(input); // not to be used by TL. (due to need of external path) + + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + // Now return the feature vector. The P1EDA template will use this. + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + AlignmentComponent aligner1; + AlignmentComponent aligner2; +// AlignmentComponent aligner3; +// AlignmentComponent aligner4; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_IT.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_IT.java new file mode 100644 index 00000000..d8a01ab7 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/FNR_IT.java @@ -0,0 +1,183 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; + +import java.io.File; +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetITLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerIT; + +/** + * + * Fast aNd Reliable, word-coverage based Italian configuration of P1EDA. + * (This setting will get you accuracy 64.875 on RTE3-IT. Not the best + * setting, but reliable, and fast to be used on "any" text. ) + * + * Mainly to be used for WP6 experiments. + * + * @author Tae-Gil Noh + */ +@SuppressWarnings("unused") +public class FNR_IT extends P1EDATemplate { + + public FNR_IT() throws EDAException + { + try { + identicalLemmaLinker = new IdenticalLemmaPhraseLinker(); + paraphraseLinker = new MeteorPhraseLinkerIT(); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + try { + identicalLemmaLinker.annotate(input); + paraphraseLinker.annotate(input); + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent identicalLemmaLinker; + AlignmentComponent paraphraseLinker; + AlignmentComponent italianWordNetLinker; + AlignmentComponent italianWikiLinker; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/WNVOMT.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/WNVOMT.java new file mode 100644 index 00000000..852821a5 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/WNVOMT.java @@ -0,0 +1,178 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +@SuppressWarnings("unused") +public class WNVOMT extends P1EDATemplate { + + public WNVOMT() throws EDAException + { + // And let's keep the alinger instance and scoring component... + // This configuration keeps just one for each. (as-is counter) + try { + aligner1 = new IdenticalLemmaPhraseLinker(); + aligner2 = new MeteorPhraseLinkerEN(); + aligner3 = new WordNetENLinker(null); + aligner4 = new VerbOceanENLinker(); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + // Here, just one aligner... (same lemma linker) + try { + aligner1.annotate(input); + aligner2.annotate(input); + aligner3.annotate(input); // WordNet. Really slow in its current form. (several hours) + aligner4.annotate(input); + + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent aligner1; + AlignmentComponent aligner2; + AlignmentComponent aligner3; + AlignmentComponent aligner4; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/WithVO.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/WithVO.java new file mode 100644 index 00000000..c2fab9de --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/WithVO.java @@ -0,0 +1,178 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +@SuppressWarnings("unused") +public class WithVO extends P1EDATemplate { + + public WithVO() throws EDAException + { + // And let's keep the alinger instance and scoring component... + // This configuration keeps just one for each. (as-is counter) + try { + aligner1 = new IdenticalLemmaPhraseLinker(); + aligner2 = new MeteorPhraseLinkerEN(); +// aligner3 = new WordNetENLinker(null); + aligner4 = new VerbOceanENLinker(); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + // Here, just one aligner... (same lemma linker) + try { + aligner1.annotate(input); + aligner2.annotate(input); +// aligner3.annotate(input); // WordNet. Really slow in its current form. (several hours) + aligner4.annotate(input); + + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent aligner1; + AlignmentComponent aligner2; + AlignmentComponent aligner3; + AlignmentComponent aligner4; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/WithoutVO.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/WithoutVO.java new file mode 100644 index 00000000..00e5ecc0 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/WithoutVO.java @@ -0,0 +1,178 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; + +import java.util.Vector; + +import org.apache.uima.jcas.JCas; + +import weka.classifiers.bayes.NaiveBayes; +import weka.classifiers.functions.Logistic; +import weka.classifiers.functions.MultilayerPerceptron; +import weka.classifiers.functions.SimpleLogistic; +import weka.classifiers.functions.VotedPerceptron; +import weka.classifiers.lazy.KStar; +import weka.classifiers.meta.LogitBoost; +import weka.classifiers.trees.J48; +import weka.classifiers.trees.RandomForest; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleVerbCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ClassifierException; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ParameterValue; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.VerbOceanENLinker; +import eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped.WordNetENLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerDE; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; + +@SuppressWarnings("unused") +public class WithoutVO extends P1EDATemplate { + + public WithoutVO() throws EDAException + { + // And let's keep the alinger instance and scoring component... + // This configuration keeps just one for each. (as-is counter) + try { + aligner1 = new IdenticalLemmaPhraseLinker(); + aligner2 = new MeteorPhraseLinkerEN(); +// aligner3 = new WordNetENLinker(null); +// aligner4 = new VerbOceanENLinker(null); + } + catch (AlignmentComponentException ae) + { + throw new EDAException("Initializing Alignment components failed: " + ae.getMessage(), ae); + } + + wordCoverageScorer = new SimpleWordCoverageCounter(null); + nerCoverageScorer = new SimpleProperNounCoverageCounter(); + verbCoverageScorer = new SimpleVerbCoverageCounter(); + } + + @Override + public void addAlignments(JCas input) throws EDAException { + + // Here, just one aligner... (same lemma linker) + try { + aligner1.annotate(input); + aligner2.annotate(input); +// aligner3.annotate(input); // WordNet. Really slow in its current form. (several hours) +// aligner4.annotate(input); + + } + catch (PairAnnotatorComponentException pe) + { + throw new EDAException("Underlying aligner raised an exception", pe); + } + + } + + @Override + public Vector evaluateAlignments(JCas aJCas, Vector param) throws EDAException { + + // The simplest possible method... that works well with simple alignment added + // on addAlignments step. + // count the "covered" ratio (== H term linked) of words in H. + // Note that this instance does not utilize param at all. + + // the feature vector that will be filled in + Vector fv = new Vector(); + try { + Vector score1 = wordCoverageScorer.calculateScores(aJCas); + // we know word Coverage scorer returns 4 numbers. + // ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + // Make two "coverage" ratio now. + + double ratio1 = score1.get(0) / score1.get(1); + double ratio2 = score1.get(2) / score1.get(3); + + logger.debug("Adding feature as: " + score1.get(0) + "/" + score1.get(1)); + logger.debug("Adding feature as: " + score1.get(2) + "/" + score1.get(3)); + fv.add(new FeatureValue(ratio1)); + fv.add(new FeatureValue(ratio2)); + + Vector score2 = nerCoverageScorer.calculateScores(aJCas); + // we know NER Coverage scorer returns 2 numbers. + // (number of ner words covered in H, number of all NER words in H) + // let's make one coverage ratio. + + // ratio of Proper noun coverage ... + double ratio_ner = 0; + // special case first ... + if (score2.get(1) == 0) + ratio_ner = 1.0; + else + { + ratio_ner = score2.get(0) / score2.get(1); + } + fv.add(new FeatureValue(ratio_ner)); + + + Vector score3 = verbCoverageScorer.calculateScores(aJCas); + // we know Verb Coverage counter returns 2 numbers. + // (number of covered Vs in H, number of all Vs in H) + double ratio_V = 0; + // special case first... (hmm would be rare but) + if(score3.get(1) ==0) + ratio_V = 1.0; + else + { + ratio_V = score3.get(0) / score3.get(1); + } + fv.add(new FeatureValue(ratio_V)); + + } + catch (ScoringComponentException se) + { + throw new EDAException("Scoring component raised an exception", se); + } + catch (ArrayIndexOutOfBoundsException obe) + { + throw new EDAException("Integrity failure - this simply shouldn't happen", obe); + } + + return fv; + } + + @Override + protected EDAClassifierAbstraction prepareClassifier() throws EDAException + { + try { + return new EDABinaryClassifierFromWeka(new Logistic(), null); + // you can use other classifiers from Weka, such as ... + //return new EDABinaryClassifierFromWeka(new NaiveBayes(), null); + //return new EDABinaryClassifierFromWeka(new VotedPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new J48(), null); + //return new EDABinaryClassifierFromWeka(new MultilayerPerceptron(), null); + //return new EDABinaryClassifierFromWeka(new KStar(), null); + //return new EDABinaryClassifierFromWeka(new SimpleLogistic(), null); + //return new EDABinaryClassifierFromWeka(new RandomForest(), null); + + } + catch (ClassifierException ce) + { + throw new EDAException("Preparing an instance of Classifier for EDA failed: underlying Classifier raised an exception: ", ce); + } + } + + + AlignmentComponent aligner1; + AlignmentComponent aligner2; + AlignmentComponent aligner3; + AlignmentComponent aligner4; + + ScoringComponent wordCoverageScorer; + ScoringComponent nerCoverageScorer; + ScoringComponent verbCoverageScorer; + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/package-info.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/package-info.java new file mode 100644 index 00000000..174d7b3b --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/sandbox/package-info.java @@ -0,0 +1,8 @@ + +/** + * A sandbox where you can add various "setups", that configures (extends) P1EDA templates. + * + * @author Tae-Gil Noh + * + */ +package eu.excitementproject.eop.alignmentedas.p1eda.sandbox; \ No newline at end of file diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/SimpleProperNounCoverageCounter.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/SimpleProperNounCoverageCounter.java new file mode 100644 index 00000000..9e59b4fa --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/SimpleProperNounCoverageCounter.java @@ -0,0 +1,137 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.scorers; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Vector; + +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.NP; +import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * (A Language Independent scorer) + * + * A simple, POS-based coverage feature extractor that extracts + * how much of Hypothesis named entities are covered. The decision + * is done purely based on canonical POS type NP (proper noun), and + * not based on proper NER. The good thing about this is that this + * would work for any language that properly supports canonical-POS. + * + * So use this as generic, simple approximation feature. + * + * The scorer always returns two numbers. They are; + * ( number of Proper Nouns covered in H side, number of proper nouns in H side ) + * + * @author Tae-Gil Noh + * + */ +public class SimpleProperNounCoverageCounter implements ScoringComponent { + + public SimpleProperNounCoverageCounter() { + } + + @Override + public Vector calculateScores(JCas aJCas) + throws ScoringComponentException { + + Vector result = new Vector(); + JCas hView = null; + + // get all NP Tokens. + Collection propNounTokens = new ArrayList(); + Collection allTokens = null; + try { + hView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + allTokens = JCasUtil.select(hView, Token.class); + } + catch (CASException ce) + { + throw new ScoringComponentException("Accessing CAS HView failed!", ce); + } + + for (Token t : allTokens) + { + POS p = t.getPos(); + if (p == null) + { + throw new ScoringComponentException("Cannot proceed, this scoring component requires POS annotated"); + } + + if (p.getTypeIndexID() == NP.typeIndexID) + { + propNounTokens.add(t); + } + } + + + int countPNTokens = propNounTokens.size(); + int countCoveredPNTokens = 0; + + logger.debug("calculateScore: count propNoun tokens, HView: " + countPNTokens); + + List links = null; + try + { + links = LinkUtils.selectLinksWith(aJCas, (String) null); + } + catch(CASException ce) + { + throw new ScoringComponentException("Accessing CAS failed somehow!", ce); + } + + logger.debug("calculateScore: total " + links.size() + " links fetched"); + + // for each Token, check if this token is covered. + if (links.size() == 0) + { + // no need to count + countCoveredPNTokens = 0; + } + else + { + for(Token tok : propNounTokens) + { + logger.debug("Checking Token " + tok.getCoveredText()); + + List linksHoldingThisToken = SimpleWordCoverageCounter.filterLinksWithTargetsIncluding(links, tok); + if (linksHoldingThisToken.size() != 0) + { + countCoveredPNTokens ++; + logger.debug("The token is covered by " + linksHoldingThisToken.size() + " link(s)."); + } + } + } + + // Okay. Now we have the two numbers. Return them as is. + result.add((double) countCoveredPNTokens); + result.add((double) countPNTokens); + + return result; + } + + + @Override + public String getComponentName() { + return getClass().getName(); + } + + @Override + public String getInstanceName() { + return null; + } + + // privates + private final static Logger logger = Logger.getLogger(SimpleProperNounCoverageCounter.class); + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/SimpleVerbCoverageCounter.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/SimpleVerbCoverageCounter.java new file mode 100644 index 00000000..a26dc7b2 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/SimpleVerbCoverageCounter.java @@ -0,0 +1,143 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.scorers; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Vector; + +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.uimafit.util.JCasUtil; + +//import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.NP; +import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; +import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.V; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * (A Language Independent scorer) + * It would be ideal, if we can count coverage of "main verb", or "predicates" + * + * Since that is not possible with simple "lexical" level alignments, here we + * try to check "verbs". We try to exclude all auxiliary verbs .. but that isn't easily + * possible in canonical POSes. + * + * So in this simple, language independent module, we only try "verb coverage". + * + * The scorer always returns two numbers. They are; + * ( number of Verbs covered in H side, number of verbs in H side ) + * + * @author Tae-Gil Noh + * + */ +public class SimpleVerbCoverageCounter implements ScoringComponent { + + public SimpleVerbCoverageCounter() { + } + + @Override + public Vector calculateScores(JCas aJCas) + throws ScoringComponentException { + Vector result = new Vector(); + JCas hView = null; + + // get all Verb Tokens. + Collection verbTokens = new ArrayList(); + Collection allTokens = null; + try { + hView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + allTokens = JCasUtil.select(hView, Token.class); + } + catch (CASException ce) + { + throw new ScoringComponentException("Accessing CAS HView failed!", ce); + } + + for (Token t : allTokens) + { + POS p = t.getPos(); + if (p == null) + { + throw new ScoringComponentException("Cannot proceed, this scoring component requires POS annotated"); + } + + if (p.getTypeIndexID() == V.typeIndexID) + { + verbTokens.add(t); + } + } + + + int countVTokens = verbTokens.size(); + int countCoveredVTokens = 0; + + logger.debug("calculateScore: count verb tokens, HView: " + countVTokens); + + List links = null; + try + { + links = LinkUtils.selectLinksWith(aJCas, (String) null); + } + catch(CASException ce) + { + throw new ScoringComponentException("Accessing CAS failed somehow!", ce); + } + + logger.debug("calculateScore: total " + links.size() + " links fetched"); + + // for each Token, check if this token is covered. + if (links.size() == 0) + { + // no need to count + countCoveredVTokens = 0; + } + else + { + for(Token tok : verbTokens) + { + logger.debug("Checking Token " + tok.getCoveredText()); + + List linksHoldingThisToken = SimpleWordCoverageCounter.filterLinksWithTargetsIncluding(links, tok); + if (linksHoldingThisToken.size() != 0) + { + countCoveredVTokens ++; + logger.debug("The token is covered by " + linksHoldingThisToken.size() + " link(s)."); + } + } + } + + // Okay. Now we have the two numbers. Return them as is. + result.add((double) countCoveredVTokens); + result.add((double) countVTokens); + + return result; + } + + + + @Override + public String getComponentName() { + return getClass().getName(); + } + + @Override + public String getInstanceName() { + return null; + } + + // privates + private final static Logger logger = Logger.getLogger(SimpleProperNounCoverageCounter.class); + + + // TODO: + // Keep list of common auxiliary verbs (let's say, stop verbs), and ignore them in the counting. + // And this list, is overriden by extension of this base case; thus language-specific versions can + // be built from this class as super... + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/SimpleWordCoverageCounter.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/SimpleWordCoverageCounter.java new file mode 100644 index 00000000..d1e51bfe --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/SimpleWordCoverageCounter.java @@ -0,0 +1,230 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.scorers; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Vector; + +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.FSArray; +import org.apache.uima.jcas.tcas.Annotation; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.LinkUtils; +import eu.excitement.type.alignment.Target; +//import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * (A Language Independent scorer) + * + * This is a very simple "alignment evaluator" which reports the following numbers; + * "number of covered word in H", "number of words in H". + * + * calculateScores() method will return 4 numbers + * ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + * + * CAS needs POS annotations and Token annotations, minimally. + * + * @author Tae-Gil Noh + * + */ +public class SimpleWordCoverageCounter implements ScoringComponent { + + /** + * Argument version: gets one linkID of alignment.Link. Then the module uses alignment.Link instances + * with that ID, to calculate "coverage". + * If null given, the module uses, *all* link instances to calculate coverage. + * + * calculateScores will return 4 numbers + * ( count covered tokens , count all tokens, count covered content-tokens, count all content-tokens) + * + * Content token means, tokens with POS other than "PUNC", "PP", "O", "CONJ", "ART" + */ + public SimpleWordCoverageCounter(String alignerID) { + this.alignerIdToMatch = alignerID; + } + + + @Override + public Vector calculateScores(JCas aJCas) + throws ScoringComponentException { + + Vector result = new Vector(4); // this module always returns four numbers + JCas hView = null; + + // get the list of tokens + Collection allTokens = null; + try { + hView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + allTokens = JCasUtil.select(hView, Token.class); + } + catch (CASException ce) + { + throw new ScoringComponentException("Accessing CAS HView failed!", ce); + } + int countTokens = allTokens.size(); + int countContentTokens = allTokens.size(); // will be reduced within the loops, to reflect only content words. + int countCoveredTokens = 0; + int countCoveredContentTokens = 0; + + logger.debug("calculateScore: count all tokens, HView: " + countTokens); + + // get all Links with the ID + if (alignerIdToMatch == null) + { + logger.debug("calculateScore: all links are fetched"); + } + else + { + logger.debug("calculateScore: links with ID \"" + this.alignerIdToMatch + "\" are fetched"); + } + + List linksWithTheID = null; + try + { + linksWithTheID = LinkUtils.selectLinksWith(aJCas, this.alignerIdToMatch); + } + catch(CASException ce) + { + throw new ScoringComponentException("Accessing CAS failed somehow!", ce); + } + + logger.debug("calculateScore: total " + linksWithTheID.size() + " links fetched"); + + // for each Token, check if this token is covered. + if (linksWithTheID.size() == 0) + { + // no need to count + countCoveredTokens = 0; + } + else + { + for(Token tok : allTokens) + { + logger.debug("Checking Token " + tok.getCoveredText()); + + Boolean nonContentToken = isNonContentToken(tok); + if (nonContentToken) + { + countContentTokens --; + } + + List linksHoldingThisToken = filterLinksWithTargetsIncluding(linksWithTheID, tok); + if (linksHoldingThisToken.size() != 0) + { + countCoveredTokens ++; + logger.debug("The token is covered by " + linksHoldingThisToken.size() + " link(s)."); + if (!nonContentToken) + { + countCoveredContentTokens++; + } + } + } + } + + // now the two numbers are ready. + result.add((double) countCoveredTokens); + result.add((double) countTokens); + result.add((double) countCoveredContentTokens); + result.add((double) countContentTokens); + + return result; + } + + + /** Maybe this need to go to LinkUtils + * TODO: export this method with "direction selection" option to LinkUtils + * + * @param fullList The full list of Links + * @param annot The annotation that is being considered. + * @return + */ + public static List filterLinksWithTargetsIncluding(List fullList, T annot) + { + List filteredList = new ArrayList(); + + for (Link l : fullList) + { + Target tSideTarget = l.getTSideTarget(); + Target hSideTarget = l.getHSideTarget(); + + FSArray arr = null; + arr = tSideTarget.getTargetAnnotations(); + for (Annotation a : JCasUtil.select(arr, Annotation.class)) + { + if (a == annot) + { + filteredList.add(l); + break; + } + } + + arr = hSideTarget.getTargetAnnotations(); + for (Annotation a : JCasUtil.select(arr, Annotation.class)) + { + if (a == annot) + { + // In this score component, we ignore HtoT case. (only TtoH and bidirection) + // Hmm. possible better coding for this? + if (l.getDirection() == Link.Direction.HtoT) + break; + filteredList.add(l); + break; + } + } + } + + return filteredList; + } + + + @Override + public String getComponentName() { + return getClass().getName(); + } + + @Override + public String getInstanceName() { + return null; + } + + /** + * This utility checks if the token is one of non-content token type. + * (e.g. "PUNC", "PP", "O", "CONJ", "ART"). Actual definition of non content POSes + * are borrowed from a static definition set in IdenticalLemmaPhraseLinker. + * + * @param t The token to be checked. + * @return + */ + private boolean isNonContentToken(Token t) throws ScoringComponentException + { + + POS p = t.getPos(); + if (p == null) + { + throw new ScoringComponentException("The module requires POS annotated for the Tokens, to check non-content words"); + } + String s = p.getType().toString(); + String typeString = s.substring(s.lastIndexOf(".") + 1); + //String logline = t.getCoveredText() + "/" + typeString + ", "; + Boolean result = IdenticalLemmaPhraseLinker.isNonContentPos.containsKey(typeString); + logger.debug(t.getCoveredText() + "/" + typeString + ": isNonContentToken: " + result); + + return result; + } + + private final String alignerIdToMatch; + private final static Logger logger = Logger.getLogger(SimpleWordCoverageCounter.class); + + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/package-info.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/package-info.java new file mode 100644 index 00000000..3b23f91f --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/scorers/package-info.java @@ -0,0 +1,8 @@ +/** + * This package holds some example Scorer ("Feature extractors", or "alignment evaluators", or JCas with alignment + * in and some features out) modules for P1EDA. + * + * + * @author Tae-Gil Noh + */ +package eu.excitementproject.eop.alignmentedas.p1eda.scorers; \ No newline at end of file diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/ClassifierException.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/ClassifierException.java new file mode 100644 index 00000000..2782f255 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/ClassifierException.java @@ -0,0 +1,23 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.subs; + +public class ClassifierException extends Exception { + + /** + * An exception designed to be thrown from EDAClassifierAbstraction implementations + */ + private static final long serialVersionUID = 1408025289983238534L; + + public ClassifierException(String message) { + super(message); + } + + public ClassifierException(Throwable cause) { + super(cause); + } + + public ClassifierException(String message, Throwable cause) { + super(message, cause); + } + + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/DecisionLabelWithConfidence.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/DecisionLabelWithConfidence.java new file mode 100644 index 00000000..f29ecc0e --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/DecisionLabelWithConfidence.java @@ -0,0 +1,29 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.subs; + +import eu.excitementproject.eop.common.DecisionLabel; + +/** + * Classification result for single TE instance, on EDAClassifierAbstraction + * + * @author Tae-Gil Noh + */ +public class DecisionLabelWithConfidence { + + public DecisionLabelWithConfidence(DecisionLabel label, double confidence) { + this.label = label; + this.confidence = confidence; + } + + public DecisionLabel getLabel() + { + return label; + } + + public double getConfidence() + { + return confidence; + } + + private final DecisionLabel label; + private final double confidence; +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/EDAClassifierAbstraction.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/EDAClassifierAbstraction.java new file mode 100644 index 00000000..ab49b7fd --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/EDAClassifierAbstraction.java @@ -0,0 +1,50 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.subs; + +import java.io.File; +import java.util.List; +import java.util.Vector; + + +/** + * + * An abstract for classifier(s) that support TE decisions (EDAs). + * + * See "EDABinaryClassifierFromWeka" for an implementation example. + * + * @author Tae-Gil Noh + * + */ +public interface EDAClassifierAbstraction { + + /** + * @param featureVector + * @return DecisionLabelWithConfidence + */ + public DecisionLabelWithConfidence classifyInstance(Vector featureVector) throws ClassifierException; + + /** + * @param goldData + */ + public void createClassifierModel(List goldData) throws ClassifierException; + + /** + * @param path + * @throws ClassifierException + */ + public void storeClassifierModel(File path) throws ClassifierException; + + /** + * @param path + * @throws ClassifierException + */ + public void loadClassifierModel(File path) throws ClassifierException; + + /** + * Evaluate currently trained classifier and returns a list of double values where it holds; + * (accuracy, f1, prec, recall, true positive ratio, true negative ratio) + * + * @param goldData + * @throws ClassifierException + */ + public List evaluateClassifier(List goldData) throws ClassifierException; +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/FeatureValue.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/FeatureValue.java new file mode 100644 index 00000000..e843b111 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/FeatureValue.java @@ -0,0 +1,30 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.subs; + + + +/** + * A class that represents one Feature value, for classifier abstraction + * Relies on Value class. + * + * @author Tae-Gil Noh + */ +public class FeatureValue extends Value { + + /** + * + */ + private static final long serialVersionUID = -1413295719580599625L; + + public FeatureValue(double d) { + super(d); + } + + public FeatureValue(Enum e) { + super(e); + } + + public FeatureValue(Boolean b) { + super(b); + } + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/LabeledInstance.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/LabeledInstance.java new file mode 100644 index 00000000..7575f14c --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/LabeledInstance.java @@ -0,0 +1,27 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.subs; + +import java.util.Vector; + +import eu.excitementproject.eop.common.DecisionLabel; + +public class LabeledInstance { + + public LabeledInstance(DecisionLabel goldLabel, Vector featureVector) { + + this.label = goldLabel; + this.featureVector = featureVector; + } + + public DecisionLabel getLabel() + { + return this.label; + } + + public Vector getFeatureVector() + { + return this.featureVector; + } + + private final DecisionLabel label; + private final Vector featureVector; +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/ParameterValue.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/ParameterValue.java new file mode 100644 index 00000000..2ed975da --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/ParameterValue.java @@ -0,0 +1,76 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.subs; + +/** + * A class that represents one Parameter value + * The type is used in P1EDA as a representation of a parameter value. + * + * @author Tae-Gil Noh + */ + +public class ParameterValue extends Value { + + /** + * + */ + private static final long serialVersionUID = 3934660978437533820L; + /** + * Constructor for initializing a ParameterValue + * with double. Range of the parameter will be given + * from default. (0 ~ 1) + * + * @param d + */ + public ParameterValue(double val) { + super(val); + this.rangeMin = DEFAULT_MIN; + this.rangeMax = DEFAULT_MAX; + } + + /** + * Constructor for initializing a ParameterValue + * with double. This constructor enables you to set + * min/max range of the double value of this parameter. + * + * @param val + * @param rangeMin + * @param rangeMax + */ + public ParameterValue(double val, double rangeMin, double rangeMax) + { + super(val); + this.rangeMin = rangeMin; + this.rangeMax = rangeMax; + } + + public ParameterValue(Enum e) { + super(e); + rangeMin = null; + rangeMax = null; + } + + public ParameterValue(Boolean b) { + super(b); + rangeMin = null; + rangeMax = null; + } + + public double getRangeMin() + { + return rangeMin; + } + + public double getRangeMax() + { + return rangeMax; + } + + // Range (min and max values) of the parameter value + // the values are only meaningful when getValueType() == ValueType.DOUBLE + private final Double rangeMin; + private final Double rangeMax; + + // Default values + private final double DEFAULT_MIN = 0.0; + private final double DEFAULT_MAX = 1.0; + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/Value.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/Value.java new file mode 100644 index 00000000..0e041ce2 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/Value.java @@ -0,0 +1,109 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.subs; + +import java.io.Serializable; + +/** + * This class expresses a "value" that can be either a Boolean, a double number, or a nominal value. + * + * The main usage of this class is to be represented as one "feature value", or one "parameter value". + * + * + * @author Tae-Gil Noh + * + */ +public class Value implements Serializable { + + /** + * + */ + private static final long serialVersionUID = -6413857523514908870L; + + public Value(double d) + { + dValue = d; + bValue = null; + nValue = null; + type = ValueType.DOUBLE; + } + + public Value(Enum e) + { + dValue = null; + bValue = null; + nValue = e; + type = ValueType.NOMINAL; + } + + public Value(Boolean b) + { + dValue = null; + bValue = b; + nValue = null; + type = ValueType.BOOLEAN; + } + + // getters + + public Boolean getBooleanValue() throws ValueException + { + if (type != ValueType.BOOLEAN) + { + throw new ValueException("Boolean value requested, while the value is not a boolean"); + } + return bValue; + } + + public Double getDoubleValue() throws ValueException + { + if (type != ValueType.DOUBLE) + { + throw new ValueException("Double value requested, while the value is not a double"); + } + return dValue; + } + + public Enum getNominalValue() throws ValueException + { + if (type != ValueType.NOMINAL) + { + throw new ValueException("Nominal value requested, while the value is not a Nominal"); + } + return nValue; + } + + public ValueType getValueType() + { + return this.type; + } + + // overriding toString + @Override + public String toString() + { + if (type == ValueType.BOOLEAN) + { + return bValue.toString(); + } + else if (type == ValueType.DOUBLE) + { + return dValue.toString(); + } + else // (type == ValueType.NOMINAL) + { + return nValue.toString(); + } + } + + // private data + + private final ValueType type; + private final Double dValue; + private final Boolean bValue; + private final Enum nValue; + + public enum ValueType + { + DOUBLE, BOOLEAN, NOMINAL + } + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/ValueException.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/ValueException.java new file mode 100644 index 00000000..b6f86192 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/ValueException.java @@ -0,0 +1,19 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.subs; + +public class ValueException extends Exception { + + private static final long serialVersionUID = -3913760857316479052L; + + public ValueException(String message) { + super(message); + } + + public ValueException(Throwable cause) { + super(cause); + } + + public ValueException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/package-info.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/package-info.java new file mode 100644 index 00000000..cc3bb195 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/subs/package-info.java @@ -0,0 +1,7 @@ +/** + * This package holds several data structures and support classes for P1EDA. + * + * @author Tae-Gil Noh + * + */ +package eu.excitementproject.eop.alignmentedas.p1eda.subs; \ No newline at end of file diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/templates/package-info.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/templates/package-info.java new file mode 100644 index 00000000..f29ebf83 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/templates/package-info.java @@ -0,0 +1,8 @@ +/** + * + */ +/** + * @author tailblues + * + */ +package eu.excitementproject.eop.alignmentedas.p1eda.templates; \ No newline at end of file diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/tools/CompareTwoEDAs.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/tools/CompareTwoEDAs.java new file mode 100644 index 00000000..dfdbb366 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/tools/CompareTwoEDAs.java @@ -0,0 +1,207 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.tools; + +import java.io.File; +import java.util.HashMap; + +import org.apache.log4j.Logger; +import org.apache.uima.cas.FSIterator; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.TOP; + +import eu.excitement.type.entailment.Pair; +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.sandbox.WithVO; +import eu.excitementproject.eop.alignmentedas.p1eda.sandbox.WithoutVO; +import eu.excitementproject.eop.common.DecisionLabel; +import eu.excitementproject.eop.common.EDABasic; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.TEDecision; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.PlatformCASProber; + +/** + * This class holds some static methods that are useful, or needed to + * compare and visualize differences between two different (P1) EDAs + * + * @author Tae-Gil Noh + * + */ +public class CompareTwoEDAs { + + // TODO: Two lists output? instead of one? a.) improved on EDA2. b.) degraded on EDA2. + // TODO: startTraining(XML file?, modelToStore)? (on template side) + // TODO: include training... + + + public static void main(String[] args) + { + try { + P1EDATemplate withVO = new WithVO(); + P1EDATemplate withoutVO = new WithoutVO(); + + withVO.initialize(new File("target/withVO.cmodel")); + withoutVO.initialize(new File("target/withoutVO.cmodel")); + + logDiffPairs(withVO, withoutVO, new File("target/testingXmis")); + } + catch (Exception e) + { + System.err.println("Run stopped with Exception: " + e.getMessage()); + } + } + + + + + /** + * Pass two initialized (ready to be call process()) EDAs, + * and one dir Path to XMI files. + * + * The utility will print out to Logger; all pair IDs that the two given + * EDAs did *not* agree. + * + * @param eda1 + * @param eda2 + * @param dirXMITestSet + */ + static public void logDiffPairs(EDABasic eda1, EDABasic eda2, File dirXMITestSet) throws Exception + { + // well. Run each of the XMI pairs on two EDAs, and keep the + // pairID, and results. + // oh, by the way, keep general accuracy too. ... + + HashMap diffPairs = new HashMap(); // diffPairs.get("id") = "eda 1 result (confidence), eda2 result(confidence), gold result" + int countEda1Correct = 0; + int countEda2Correct = 0; + int countTotalPair = 0; + + File[] files = dirXMITestSet.listFiles(); + + // for each XMI ... + for(File f : files) + { + // sanity check first + logger.info("Working with file " + f.getName()); + if(!f.isFile()) + { // no ... + logger.warn(f.toString() + " is not a file... ignore this"); + continue; + } + if(!f.getName().toLowerCase().endsWith("xmi")) // let's trust name, if it does not end with XMI, pass + { + logger.warn(f.toString() + " is not a XMI file... ignoring this"); + continue; + } + + // load XMI to two CASes + // (Note that we can't share CASes between the two EDAs. Alignments are being added, + // and a CAs is updated with run of alignment based EDA process() call. + + JCas pairForEDA1 = null; + JCas pairForEDA2 = null; + try { + pairForEDA1 = PlatformCASProber.probeXmi(f, null); + pairForEDA2 = PlatformCASProber.probeXmi(f, null); + } + catch (LAPException le) + { + logger.warn("File " + f.toString() + " looks like XMI file, but its contents are *not* proper EOP EDA JCas"); + throw new EDAException("failed to read XMI file into a JCas", le); + } + + // get pair ID and gold annotation + String pairId = getTEPairID(pairForEDA1); + logger.info("comparing two edas on pair " + pairId); + DecisionLabel gold = getGoldLabel(pairForEDA1); + + // get the result from the two edas + TEDecision eda1s = eda1.process(pairForEDA1); + TEDecision eda2s = eda2.process(pairForEDA2); + + // update counters + countTotalPair ++; + if (eda1s.getDecision() == gold) + { + countEda1Correct++; + } + + if (eda2s.getDecision() == gold) + { + countEda2Correct++; + } + + // update diff list + if (! (eda1s.getDecision() == eda2s.getDecision())) + { + logger.debug("different results on pair " + pairId + ": " + eda1s.getDecision().toString() + ", " + eda2s.getDecision().toString()); + diffPairs.put(pairId, eda1s.getDecision().toString() + ", " + eda2s.getDecision().toString() + " (gold: " + gold.toString() + ")"); + } + + } + + logger.info("In total, " + countTotalPair + " pairs tested"); + logger.info("eda1: " + countEda1Correct + " / " + countTotalPair); + logger.info("eda2: " + countEda2Correct + " / " + countTotalPair); + logger.info("Diff list is:"); + + for (String s : diffPairs.keySet()) + { + logger.info(s + ": " + diffPairs.get(s)); + } + + } + + private static Logger logger = Logger.getLogger(CompareTwoEDAs.class); + + public static DecisionLabel getGoldLabel(JCas aJCas) throws EDAException + { + String labelString; + DecisionLabel labelEnum; + + FSIterator iter = aJCas.getJFSIndexRepository().getAllIndexedFS(Pair.type); + if (iter.hasNext()) + { + Pair p = (Pair) iter.next(); + labelString = p.getGoldAnswer(); + + if (labelString == null) // there is no gold answer annotated in this Pair + return null; + + labelEnum = DecisionLabel.getLabelFor(labelString); + + if (iter.hasNext()) + { + logger.warn("This JCas has more than one TE Pairs: This P1EDA template only processes single-pair inputs. Any additional pairs are being ignored, and only the first Pair will be processed."); + } + return labelEnum; + } + else + { + throw new EDAException("Input CAS is not well-formed CAS as EOP EDA input: missing TE pair"); + } + } + + public static String getTEPairID(JCas aJCas) throws EDAException + { + String id = null; + + // check entailment pair, + FSIterator iter = aJCas.getJFSIndexRepository().getAllIndexedFS(Pair.type); + if (iter.hasNext()) + { + Pair p = (Pair) iter.next(); + id = p.getPairID(); + + if (iter.hasNext()) + { + logger.warn("This JCas has more than one TE Pairs: This P1EDA template only processes single-pair inputs. Any additional pairs are being ignored, and only the first Pair will be processed."); + } + return id; + } + else + { + throw new EDAException("Input CAS is not well-formed CAS as EOP EDA input: missing TE pair"); + } + } + +} diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/tools/package-info.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/tools/package-info.java new file mode 100644 index 00000000..9455b460 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/tools/package-info.java @@ -0,0 +1,9 @@ + +/** + * Some tools classes that are used to test, evaluate, or experiment with + * P1 EDAs + * + * @author Tae-Gil Noh + * + */ +package eu.excitementproject.eop.alignmentedas.p1eda.tools; \ No newline at end of file diff --git a/alignmentedas/src/test/java/eu/excitement/type/alignment/InMemoryCasTest.java b/alignmentedas/src/test/java/eu/excitement/type/alignment/InMemoryCasTest.java new file mode 100644 index 00000000..546e6993 --- /dev/null +++ b/alignmentedas/src/test/java/eu/excitement/type/alignment/InMemoryCasTest.java @@ -0,0 +1,120 @@ +package eu.excitement.type.alignment; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.alignmentedas.P1EdaRTERunner; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.PlatformCASProber; +import eu.excitementproject.eop.lap.dkpro.MaltParserEN; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * Test code for, how much memory does it require to load 800 CASes in memory, + * for parameter optimization... + * + * Okay. 800 CASes can be handled in 8G memory system... + * (with just LAP annotations... ) + * + * For now, we have no fast "binary" serialization methods available (e.g. requires later DKPro) + * So, let's proceed with this memory version for "parameter (of evaluateAlignment / Feature extractor)" + * optimizers. + * + * @author Tae-Gil Noh + * + */ +@SuppressWarnings("unused") +public class InMemoryCasTest { + +// public InMemoryCasTest() { +// } + + + public static void main(String[] args) { + + // logger + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.INFO); + + try { + + // generate Xmis + //LAP_ImplBase lap = new TreeTaggerEN(); + LAP_ImplBase lap = new MaltParserEN(); + File rteInputXML = new File("../core/src/main/resources/data-set/English_dev.xml"); + File xmiDir = new File ("target/xmiTest/"); + P1EdaRTERunner.runLAPForXmis(lap, rteInputXML, xmiDir); + + logger.info("***"); logger.info("***"); logger.info("***"); + + // load Xmis + List jCasList = loadXmisAsJCasList(xmiDir); + System.out.println("JCas list loaded : " + jCasList.size() + " instances."); + + } + catch (Exception e) + { + logger.error(e.getMessage()); + System.exit(1); + } + } + + + public static List loadXmisAsJCasList(File xmiDir) + { + List casList = new ArrayList(); + + // walk each XMI files in the Directory ... + File[] files = xmiDir.listFiles(); + if (files == null) + { + logger.warn("Path " + xmiDir.getAbsolutePath() + " does not hold XMI files"); + System.exit(1); + } + + for (File f : files) + { + // is it a XMI file? + // + + logger.info("Working with file " + f.getName()); + if(!f.isFile()) + { // no ... + logger.warn(f.toString() + " is not a file... ignore this"); + continue; + } + if(!f.getName().toLowerCase().endsWith("xmi")) // let's trust name, if it does not end with XMI, pass + { + logger.warn(f.toString() + " is not a XMI file... ignoring this"); + continue; + } + + // So, we have an XMI file. Load in to CAS + JCas aTrainingPair = null; + try { + aTrainingPair = PlatformCASProber.probeXmi(f, null); + } + catch (LAPException le) + { + logger.warn("File " + f.toString() + " looks like XMI file, but its contents are *not* proper EOP EDA JCas"); + System.exit(1); + } + casList.add(aTrainingPair); + } + + logger.info("Loaded " + casList.size() + " XMI files as JCas..."); + return casList; + } + + public static Logger logger = Logger.getLogger(InMemoryCasTest.class); + + + +} diff --git a/alignmentedas/src/test/java/eu/excitement/type/alignment/LinkUtilsTest.java b/alignmentedas/src/test/java/eu/excitement/type/alignment/LinkUtilsTest.java new file mode 100644 index 00000000..e3d7959d --- /dev/null +++ b/alignmentedas/src/test/java/eu/excitement/type/alignment/LinkUtilsTest.java @@ -0,0 +1,108 @@ +package eu.excitement.type.alignment; + +import static org.junit.Assert.*; + +import java.util.List; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.junit.Assume; +import org.junit.Ignore; +import org.junit.Test; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.utilities.uima.UimaUtils; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseLinkerEN; +import eu.excitementproject.eop.lap.dkpro.OpenNLPTaggerEN; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; + +@SuppressWarnings("unused") +public class LinkUtilsTest { + + @Ignore + @Test + public void test() { + // Set Log4J for the test + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.INFO); // for UIMA (hiding < INFO) + Logger testlogger = Logger.getLogger("eu.excitement.type.alignment.LunkUtilsTest"); + + + // prepare a lemmatizer + TreeTaggerEN lemmatizer = null; + try + { + JCas test = UimaUtils.newJcas(); + lemmatizer = new TreeTaggerEN(); + lemmatizer.generateSingleTHPairCAS("this is a test.", "TreeTagger in sight?"); + } + catch (Exception e) + { + // check if this is due to missing TreeTagger binary and model. + // In such a case, we just skip this test. + // (see /lap/src/scripts/treetagger/README.txt to how to install TreeTagger) + if (ExceptionUtils.getRootCause(e) instanceof java.io.IOException) + { + testlogger.info("Skipping the test: TreeTagger binary and/or models missing. \n To run this testcase, TreeTagger installation is needed. (see /lap/src/scripts/treetagger/README.txt)"); + Assume.assumeTrue(false); // we won't test this test case any longer. + } + + fail(e.getMessage()); + } + + // prepare aJCas with two different aligners + AlignmentComponent idtLinker = null; + AlignmentComponent phraseLinker = null; + JCas aJCas = null; + try { + idtLinker = new IdenticalLemmaPhraseLinker(); + phraseLinker = new MeteorPhraseLinkerEN(); + } + catch (Exception e) + { + fail(e.getMessage()); + } + try { + // RTE3 test pair 17 (some links in both Meteor & same lemma linker) + aJCas = lemmatizer.generateSingleTHPairCAS( + "David Golinkin is single-handedly responsible for uncovering and re-publishing dozens of responsa of the Committee on Jewish Law and Standards of the Rabbinical Assembly, making them available to the general public in a three-volume set.", + "David Golinkin is the author of dozen of responsa of the Committee on Jewish Law and Standards of the Rabbinical Assembly."); + phraseLinker.annotate(aJCas); + idtLinker.annotate(aJCas); + //LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // test selectLinksWith + try { + selectLinksWithTest(aJCas); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + } + + private void selectLinksWithTest(JCas aJCas) throws CASException + { + List linksWithMeteor = LinkUtils.selectLinksWith(aJCas, "MeteorPhraseLink"); + assertEquals(4, linksWithMeteor.size()); + // System.out.println(linksWithMeteor.size()); + List linksWithIdentical = LinkUtils.selectLinksWith(aJCas, "IdenticalLemmas"); + assertEquals(18, linksWithIdentical.size()); + // System.out.println(linksWithIdentical.size()) ; + + } + + +} diff --git a/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/MinimalP1EDATest.java b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/MinimalP1EDATest.java new file mode 100644 index 00000000..315852b1 --- /dev/null +++ b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/MinimalP1EDATest.java @@ -0,0 +1,107 @@ +package eu.excitementproject.eop.alignmentedas; + +import static org.junit.Assert.*; + +import java.io.File; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Assume; +import org.junit.Test; + +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.TEDecisionWithAlignment; +import eu.excitementproject.eop.alignmentedas.p1eda.instances.MinimalP1EDA; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.PlatformCASProber; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; + +public class MinimalP1EDATest { + + @Test + public void test() { + + // Set Log4J for the test + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.DEBUG); // set INFO to hide Debug + testlogger = Logger.getLogger(getClass().getName()); + + // prepare a lemmatizer + TreeTaggerEN lap = null; + + try + { + lap = new TreeTaggerEN(); + lap.generateSingleTHPairCAS("this is a test.", "TreeTagger in sight?"); + } + catch (Exception e) + { + // check if this is due to missing TreeTagger binary and model. + // In such a case, we just skip this test. + // (see /lap/src/scripts/treetagger/README.txt to how to install TreeTagger) + if (ExceptionUtils.getRootCause(e) instanceof java.io.IOException) + { + testlogger.info("Skipping the test: TreeTagger binary and/or models missing. \n To run this testcase, TreeTagger installation is needed. (see /lap/src/scripts/treetagger/README.txt)"); + Assume.assumeTrue(false); // we won't test this test case any longer. + } + } + + try { + doMinimalTest(lap); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + } + + public void doMinimalTest(TreeTaggerEN lap) throws EDAException, LAPException + { + + // get an instance of the EDA + P1EDATemplate eda = new MinimalP1EDA(); + + // Make the "very simple", "minimal" two training data. + JCas cas1 = lap.generateSingleTHPairCAS("The train was uncomfortable", "the train was comfortable", "NONENTAILMENT"); + JCas cas2 = lap.generateSingleTHPairCAS("The person is hired as a postdoc.","The person is hired as a postdoc.", "ENTAILMENT"); + + File xmiDir = new File("target/xmis/"); + if (!xmiDir.exists()) + { + xmiDir.mkdirs(); + } + File modelBaseName = new File("target/simple"); + + PlatformCASProber.storeJCasAsXMI(cas1, new File("target/xmis/train1.xmi")); + PlatformCASProber.storeJCasAsXMI(cas2, new File("target/xmis/train2.xmi")); + + // Okay. Start Training + eda.startTraining(xmiDir, modelBaseName); + + // ask something? + JCas eopJCas = lap.generateSingleTHPairCAS("This was hello world.", "This is hello world."); + eda.process(eopJCas); + + // ask another + eopJCas = lap.generateSingleTHPairCAS("This is a very simple configuration.", "This is in fact a complex configuration."); + TEDecisionWithAlignment d1 = eda.process(eopJCas); + testlogger.info(d1.getDecision() + ", " + d1.getConfidence()); + + // load Model test + MinimalP1EDA eda2 = new MinimalP1EDA(); + eda2.initialize(modelBaseName); + TEDecisionWithAlignment d2 = eda2.process(eopJCas); + assertEquals(d2.getDecision(), d1.getDecision()); + assertEquals(d2.getConfidence(), d1.getConfidence(), 0.01); + + } + + public static Logger testlogger; + +} diff --git a/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/scorers/SimpleNerCoverageCounterTest.java b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/scorers/SimpleNerCoverageCounterTest.java new file mode 100644 index 00000000..bcf7956d --- /dev/null +++ b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/scorers/SimpleNerCoverageCounterTest.java @@ -0,0 +1,81 @@ +package eu.excitementproject.eop.alignmentedas.scorers; + +import static org.junit.Assert.*; + +import java.util.Vector; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Assume; +import org.junit.Test; + +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleProperNounCoverageCounter; +//import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; + +public class SimpleNerCoverageCounterTest { + + @Test + public void test() { + // Set Log4J for the test + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.DEBUG); // for UIMA (hiding < INFO) + Logger testlogger = Logger.getLogger(getClass().getName()); + + // prepare a lemmatizer + TreeTaggerEN lemmatizer = null; + JCas aJCas = null; + try + { + lemmatizer = new TreeTaggerEN(); + aJCas = lemmatizer.generateSingleTHPairCAS("John Fitzgerald Kennedy, the 35th President of the United States, was assassinated at 12:30 p.m. Central Standard Time (18:30 UTC) on Friday, November 22, 1963, in Dealey Plaza, Dallas, Texas.", "John F. Kennedy was assasinated by Lee Harvey Oswald at the state of Dallas."); + } + catch (Exception e) + { + // check if this is due to missing TreeTagger binary and model. + // In such a case, we just skip this test. + // (see /lap/src/scripts/treetagger/README.txt to how to install TreeTagger) + if (ExceptionUtils.getRootCause(e) instanceof java.io.IOException) + { + testlogger.info("Skipping the test: TreeTagger binary and/or models missing. \n To run this testcase, TreeTagger installation is needed. (see /lap/src/scripts/treetagger/README.txt)"); + Assume.assumeTrue(false); // we won't test this test case any longer. + } + + fail(e.getMessage()); + } + + // annotate with identity + try { + AlignmentComponent idtLinker = new IdenticalLemmaPhraseLinker(); + idtLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // and simple test. + ScoringComponent count1 = new SimpleProperNounCoverageCounter(); + try { + Vector v = count1.calculateScores(aJCas); + testlogger.info(v.get(0)); + testlogger.info(v.get(1)); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + } + +} diff --git a/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/scorers/SimpleWordCoverageCounterTest.java b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/scorers/SimpleWordCoverageCounterTest.java new file mode 100644 index 00000000..917ee1a1 --- /dev/null +++ b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/scorers/SimpleWordCoverageCounterTest.java @@ -0,0 +1,108 @@ +package eu.excitementproject.eop.alignmentedas.scorers; + +import static org.junit.Assert.*; + +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Vector; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Assume; +import org.junit.Test; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.alignmentedas.p1eda.scorers.SimpleWordCoverageCounter; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.scoring.ScoringComponent; +import eu.excitementproject.eop.core.component.alignment.phraselink.IdenticalLemmaPhraseLinker; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; + +public class SimpleWordCoverageCounterTest { + + @Test + public void test() { + + // Set Log4J for the test + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.DEBUG); // for UIMA (hiding < INFO) + Logger testlogger = Logger.getLogger(getClass().getName()); + + // prepare a lemmatizer + TreeTaggerEN lemmatizer = null; + JCas aJCas = null; + try + { + lemmatizer = new TreeTaggerEN(); + aJCas = lemmatizer.generateSingleTHPairCAS("Lovely TreeTagger test is in sight, or lovely goes not?", "Lovely goes a test."); + } + catch (Exception e) + { + // check if this is due to missing TreeTagger binary and model. + // In such a case, we just skip this test. + // (see /lap/src/scripts/treetagger/README.txt to how to install TreeTagger) + if (ExceptionUtils.getRootCause(e) instanceof java.io.IOException) + { + testlogger.info("Skipping the test: TreeTagger binary and/or models missing. \n To run this testcase, TreeTagger installation is needed. (see /lap/src/scripts/treetagger/README.txt)"); + Assume.assumeTrue(false); // we won't test this test case any longer. + } + + fail(e.getMessage()); + } + + // annotate with identity + try { + AlignmentComponent idtLinker = new IdenticalLemmaPhraseLinker(); + idtLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // get first token of H, and test the method + // filterLinksWithTargetsIncluding + + try { + JCas hView = aJCas.getView("HypothesisView"); + Collection tokens = JCasUtil.select(hView, Token.class); + List links = LinkUtils.selectLinksWith(aJCas, (String) null); + Iterator ti = tokens.iterator(); + ti.next(); // first token + Token t = ti.next(); // second token + List filteredLinks = SimpleWordCoverageCounter.filterLinksWithTargetsIncluding(links, t); + //System.out.println(filteredLinks.size()); + assertEquals(1, filteredLinks.size()); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // Okay, Let's do some coverage test. + ScoringComponent count1 = new SimpleWordCoverageCounter(null); // count all + try { + Vector v = count1.calculateScores(aJCas); + testlogger.info(v.get(0)); + testlogger.info(v.get(1)); + testlogger.info(v.get(2)); + testlogger.info(v.get(3)); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + } + +} diff --git a/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/subs/EDABinaryClassifierFromWekaTest.java b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/subs/EDABinaryClassifierFromWekaTest.java new file mode 100644 index 00000000..1761ba44 --- /dev/null +++ b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/subs/EDABinaryClassifierFromWekaTest.java @@ -0,0 +1,118 @@ +package eu.excitementproject.eop.alignmentedas.subs; + +import static org.junit.Assert.*; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.Vector; + +import org.junit.Test; + +import eu.excitementproject.eop.alignmentedas.p1eda.classifiers.EDABinaryClassifierFromWeka; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.DecisionLabelWithConfidence; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.EDAClassifierAbstraction; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.LabeledInstance; +import eu.excitementproject.eop.common.DecisionLabel; + +public class EDABinaryClassifierFromWekaTest { + + @Test + public void test() { + // prepare a training set + List trainingData = new ArrayList(); + + Vector fv1 = new Vector(); + fv1.addElement(new FeatureValue(1.0)); + fv1.addElement(new FeatureValue(0.5)); + fv1.addElement(new FeatureValue(MyColor.gray)); + + LabeledInstance ins1 = new LabeledInstance(DecisionLabel.Entailment, fv1); + trainingData.add(ins1); + + + // init one and ask it to train ... + EDAClassifierAbstraction classifier = null; + try { + classifier = new EDABinaryClassifierFromWeka(); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + try { + classifier.createClassifierModel(trainingData); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // classify an instance ... + + Vector fv2 = new Vector(); + fv2.addElement(new FeatureValue(0.5)); + fv2.addElement(new FeatureValue(0.1)); + fv2.addElement(new FeatureValue(MyColor.blue)); + + try { + DecisionLabelWithConfidence result = classifier.classifyInstance(fv2); + System.out.println(result.getLabel().toString()); + System.out.println(result.getConfidence()); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // evaluate classifier ... + try { + List eval = classifier.evaluateClassifier(trainingData); + System.out.println("acc: " + eval.get(0)); + System.out.println("f1 :" + eval.get(1)); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + + // store model, + File f = new File("target/default1.model"); + try { + classifier.storeClassifierModel(f); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // load model on a new instance ... + // and ask again ... + EDAClassifierAbstraction classifier2 = null; + try { + classifier2 = new EDABinaryClassifierFromWeka(); + classifier2.loadClassifierModel(f); + DecisionLabelWithConfidence result = classifier.classifyInstance(fv2); + System.out.println(result.getLabel().toString()); + System.out.println(result.getConfidence()); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + f.delete(); + + } + + public enum MyColor + { + blue, + gray, + black + } + +} diff --git a/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/subs/ValueTest.java b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/subs/ValueTest.java new file mode 100644 index 00000000..2ac56552 --- /dev/null +++ b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/subs/ValueTest.java @@ -0,0 +1,44 @@ +package eu.excitementproject.eop.alignmentedas.subs; + +//import static org.junit.Assert.*; + +import org.junit.Assert; +import org.junit.Test; + +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; + +public class ValueTest { + + @Test + public void test() { + + + FeatureValue v1 = new FeatureValue(MyNominalValues.NOMINAL1); + + try + { + Enum e = v1.getNominalValue(); + // we can access the enum value it self.. + System.out.println(e.toString()); + + // and also all the other values permitted in that enum. + // this is essential for training data (of nominal values) + Enum[] elist = e.getClass().getEnumConstants(); + for (Enum x : elist) + { + System.out.println(x); + } + } + catch (Exception e) + { + Assert.fail(e.getMessage()); + } + } + + public enum MyNominalValues { + NOMINAL1, + NOMINAL2, + NOMINAL3 + } + +} diff --git a/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/temptest/package-info.java b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/temptest/package-info.java new file mode 100644 index 00000000..71ac8339 --- /dev/null +++ b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/temptest/package-info.java @@ -0,0 +1,5 @@ +/** + * Temporary tests used for building T1EDA ... + * + */ +package eu.excitementproject.eop.alignmentedas.temptest; \ No newline at end of file diff --git a/biutee/pom.xml b/biutee/pom.xml index 5f49ae4b..05cdcbd9 100644 --- a/biutee/pom.xml +++ b/biutee/pom.xml @@ -4,7 +4,7 @@ eu.excitementproject eop - 1.1.3 + 1.1.4 biutee biutee @@ -36,7 +36,7 @@ eu.excitementproject transformations - 1.1.3 + 1.1.4 diff --git a/common/pom.xml b/common/pom.xml index ee591933..715de149 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -3,7 +3,7 @@ eu.excitementproject eop - 1.1.3 + 1.1.4 common common diff --git a/lap/src/main/java/eu/excitementproject/eop/lap/DummyAE.java b/common/src/main/java/eu/excitement/type/DummyAE.java similarity index 94% rename from lap/src/main/java/eu/excitementproject/eop/lap/DummyAE.java rename to common/src/main/java/eu/excitement/type/DummyAE.java index 18ef2c93..f11f8fd3 100644 --- a/lap/src/main/java/eu/excitementproject/eop/lap/DummyAE.java +++ b/common/src/main/java/eu/excitement/type/DummyAE.java @@ -1,4 +1,4 @@ -package eu.excitementproject.eop.lap; +package eu.excitement.type; import org.apache.uima.analysis_component.JCasAnnotator_ImplBase; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; diff --git a/common/src/main/java/eu/excitement/type/alignment/GroupLabelDomainLevel.java b/common/src/main/java/eu/excitement/type/alignment/GroupLabelDomainLevel.java new file mode 100644 index 00000000..ef436990 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/alignment/GroupLabelDomainLevel.java @@ -0,0 +1,43 @@ +package eu.excitement.type.alignment; + +/** + * This Enum is defined to show the "group" of a specific alignment.Link instance. + * This file defines "Domain Level" semantic group labels. By domain level, we assume different granularity, + * or different source: such as "lexical level" grouping, or "syntactic level" grouping, or "predicate level" grouping, + * They are different to generic "inference level", and we call such as domain (domain of syntactic, domain of lexical, etc). + * + * The enum values are used in Link class (the class that represents alignment.Link CAS type). + * See addGroupLabel() methods and getGroupLabelDomainLevel() methods in Link class; they are the main users of the + * enum defined here. + * + * NOTE: using semantic group label is optional - but it is recommended that an aligner should add (at least) + * inference-level semantic labels. + * + * NOTE: the enum class can be extended to reflect new domain (e.g. predicate level relations in the future, etc) + * + * @author Tae-Gil Noh + * @since September 2014 + */ + +// Note: the following list has been first defined and proposed by BIU. +// (although they are tuned a bit after some discussion) +// See the following Google Doc to check the rational for the domain level +// definitions, and how they are mapped to actual ontologies such as WordNet +// and VerbOcean. +// http://goo.gl/xlUm3h + + +public enum GroupLabelDomainLevel { + SYNONYM, + HYPERNYM, + HYPONYM, + MERONYM, + HOLONYM, + CAUSE, + DERIVATIONALLY_RELATED, + HAPPENES_BEFORE, + ANTONYM, + SAME_PREDICATE_TRUTH, + OPPOSITE_PREDICATE_TRUTH, + SEMANTICALLY_RELATED, +} diff --git a/common/src/main/java/eu/excitement/type/alignment/GroupLabelInferenceLevel.java b/common/src/main/java/eu/excitement/type/alignment/GroupLabelInferenceLevel.java new file mode 100644 index 00000000..af8b4e0d --- /dev/null +++ b/common/src/main/java/eu/excitement/type/alignment/GroupLabelInferenceLevel.java @@ -0,0 +1,29 @@ +package eu.excitement.type.alignment; + +/** + * + * This Enum is defined to show the "group" of a specific alignment.Link instance. + * This file defines "Inference Level" semantic group labels. + * + * For example, is this link means Entailment relation establishing between the items that are linked? + * (such as, synonyms linked by a lexical aligner). + * + * This enum type defines "inference level" (or top-level) semantic group, such as "aligned targets are having relationship + * that is ..." + * + * The enum values are used in Link class (the class that represents alignment.Link CAS type). + * See addGroupLabel() methods and getGroupLabelInferenceLevel() methods in Link class; they are the main users of the + * enums defined here. + * + * NOTE: the enum class might be extended to reflect new top level inference relations for the future + * alignment.Links. But this enum should be kept stable, simple, and common enough; so, the semantic label + * would be actually used (both on annotating side and consumer (EDA) side). + * + * @author Tae-Gil Noh + * @since September 2014 + */ +public enum GroupLabelInferenceLevel { + LOCAL_CONTRADICTION, + LOCAL_ENTAILMENT, + LOCAL_SIMILARITY +} diff --git a/common/src/main/java/eu/excitement/type/alignment/Link.java b/common/src/main/java/eu/excitement/type/alignment/Link.java index 16b36861..a66b652a 100644 --- a/common/src/main/java/eu/excitement/type/alignment/Link.java +++ b/common/src/main/java/eu/excitement/type/alignment/Link.java @@ -1,18 +1,20 @@ package eu.excitement.type.alignment; +import java.util.HashSet; +import java.util.Set; + +import org.apache.uima.cas.CASException; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.NonEmptyStringList; import org.apache.uima.jcas.cas.TOP_Type; - import org.apache.uima.jcas.cas.StringList; import org.apache.uima.jcas.tcas.Annotation; /** - CAS type that links two Target. - Multi-view type: a Link connects one target in T (TextView), the other target in H (HypothesisView). - The semantic of a "Link" is: The texts (or structures) pointed by "TSideTarget" and "HSideTarget" have a relation of "type", with the direction of "direction", on a strength of "strength". - We make no assumptions regarding what annotations are aligned by Link and Target types. One Target can be linked by an arbitrary number of Link, also a Target can group an arbitrary number of Annotations. Note that uima.tcas.Annotation is the super type of almost all CAS annotation data. Since a Target can group Annotation, it can group any type of annotations in CAS. Some notes on Link type usage. (Indexing and setting begin - end) @@ -216,26 +218,31 @@ public void setLinkInfo(String v) { //*--------------* //* Feature: groupLabel - /** getter for groupLabel - gets TBDTBDTBDTBD - -TO BE DETERMINED. - -We will adopt "common semantic groups", such as "LOCAL-ENTAILMENT" links, or "LOCAL-CONTRADICTION" links, and so on. This field is for those "labels". Such labels are provided as "Convenience" tools --- to help the consumer modules of alignment.Link can classify various Links without hard-coding aliner Id or link's getIDs. - -Actual values for the labels will be updated. TBDTBDTBDTBD + /** + * Getter for groupLabel - Please do not use this method directly. + * + * Instead, Please use getGroupLabel methods that returns a Enum Set. + * (getGroupLabelInferenceLevel() and getGroupLabelDomainLevel()). + * + * This method retrieves the underlying StringList object, which is a linkedList. + * The wrapper methods getGroupLabel() for levels are better presented with Enums. + * * @generated */ public StringList getGroupLabel() { if (Link_Type.featOkTst && ((Link_Type)jcasType).casFeat_groupLabel == null) jcasType.jcas.throwFeatMissing("groupLabel", "eu.excitement.type.alignment.Link"); return (StringList)(jcasType.ll_cas.ll_getFSForRef(jcasType.ll_cas.ll_getRefValue(addr, ((Link_Type)jcasType).casFeatCode_groupLabel)));} - /** setter for groupLabel - sets TBDTBDTBDTBD - -TO BE DETERMINED. - -We will adopt "common semantic groups", such as "LOCAL-ENTAILMENT" links, or "LOCAL-CONTRADICTION" links, and so on. This field is for those "labels". Such labels are provided as "Convenience" tools --- to help the consumer modules of alignment.Link can classify various Links without hard-coding aliner Id or link's getIDs. - -Actual values for the labels will be updated. TBDTBDTBDTBD + /** + * setter for groupLabel - Please do not use this method directly. + * + * Instead, please use addGroupLabel method that accepts two Enum types. + * --- domain level group label and inference lavel group label. + * + * This method sets StringList object, which is a linkedList. + * The wrapper methods addGroupLabel() methods are better presented + * with Enums. Use those methods. + * * @generated */ public void setGroupLabel(StringList v) { if (Link_Type.featOkTst && ((Link_Type)jcasType).casFeat_groupLabel == null) @@ -293,7 +300,147 @@ public Direction getDirection() Direction dir = Direction.valueOf(getDirectionString()); return dir; } - - } + + /** + * One of the two getter method for Group Labels. + * + * This method returns the set of "Inference level" group labels that are added for this + * alignment.Link instance. See SemanticLabelInferenceLevel enum class, for checking what + * type of labels are there currently. + * + * @return set of inference level group labels. + */ + public Set getGroupLabelsInferenceLevel() + { + Set result = new HashSet(); + + // iterate each of string, check, and add if it is. + NonEmptyStringList i = (NonEmptyStringList) this.getGroupLabel(); + + while(i != null) + { + String s = i.getHead(); + i = (NonEmptyStringList) i.getTail(); + + GroupLabelInferenceLevel label = null; + try { + label = GroupLabelInferenceLevel.valueOf(s); + } + catch(IllegalArgumentException e) + { + continue; // this string is not one of this enum. pass. + } + result.add(label); + } + return result; + } + + /** + * One of the two getter method for Group Labels. + * + * This method returns the set of "Domain level" group labels that are added for this + * alignment.Link instance. See SemanticLabelDomainLevel enum class, for checking what + * type of labels are there currently. + * + * @return set of domain level group labels. + */ + public Set getGroupLabelsDomainLevel() + { + Set result = new HashSet(); + + // iterate each of string, check, and add if it is. + NonEmptyStringList i = (NonEmptyStringList) this.getGroupLabel(); + + while(i != null) + { + String s = i.getHead(); + i = (NonEmptyStringList) i.getTail(); + + GroupLabelDomainLevel label = null; + try { + label = GroupLabelDomainLevel.valueOf(s); + } + catch(IllegalArgumentException e) + { + continue; // this string is not one of this enum. pass. + } + result.add(label); + } + return result; + } + + /** + * Use this method to add one semantic group label (domain level). + * To add multiple labels, call this method multiple times with different labels. + * + * Adding semantic group label is optional, but can be helpful for the grouping of the + * Links for the consumer of the JCas. Thus, it is highly recommended that an aligner + * should add minimally the inference level group label, if applicable. + * + * This method adds one domain level group label, to this alignment.Link instance. + * See SemanticLabelDomainLevel enum class, for checking what type of labels are there currently. + * + * @param label + */ + public void addGroupLabel(GroupLabelDomainLevel aDomainLabel) throws CASException + { + addOneStringInGroupLabelList(aDomainLabel.toString()); + } + + /** + * Use this method to add one semantic group label (inference level). + * To add multiple labels, call this method multiple times with different labels. + * + * Adding semantic group label is optional, but can be helpful for the grouping of the + * Links for the consumer of the JCas. Thus, it is highly recommended that an aligner + * should add minimally the inference level group label, if applicable. - \ No newline at end of file + * This method adds one inference level group label, to this alignment.Link instance. + * See SemanticLabelInferenceLevel enum class, for checking what type of labels are there currently. + * + * @param label + */ + public void addGroupLabel(GroupLabelInferenceLevel aInferenceLabel) throws CASException + { + addOneStringInGroupLabelList(aInferenceLabel.toString()); + } + + /** + * Worker method for addGroupLabel. + * + * @param stringToAdd + * @throws CASException + */ + private void addOneStringInGroupLabelList(String stringToAdd) throws CASException + { + NonEmptyStringList sList = (NonEmptyStringList) this.getGroupLabel(); + + // if the underlying StringList is null, make a new head. (underlying String list is a linked list) + if (sList == null) + { + NonEmptyStringList head = new NonEmptyStringList(this.getCAS().getJCas()); + head.setHead(stringToAdd); + this.setGroupLabel(head); + + } + else + { + // get to the last part, and add label as a string + NonEmptyStringList i = sList; + + // find the last node ... + while(i.getTail() != null) + { + i = (NonEmptyStringList) i.getTail(); + } + + // add new node at the end. + NonEmptyStringList newNode = new NonEmptyStringList(this.getCAS().getJCas()); + newNode.setHead(stringToAdd); + i.setTail(newNode); + } + } + + + + } \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/alignment/LinkUtils.java b/common/src/main/java/eu/excitement/type/alignment/LinkUtils.java new file mode 100644 index 00000000..e7d5b6bb --- /dev/null +++ b/common/src/main/java/eu/excitement/type/alignment/LinkUtils.java @@ -0,0 +1,208 @@ +package eu.excitement.type.alignment; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.TOP; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; + +/** + * Some utility methods for alignment.Link type and related codes. + * + * ** UNDER CONSTRUCTION **; + * ( feel free to add / request static utility methods that would be useful for alignment.Link instances in CAS) + * + * @author Tae-Gil Noh + * @since June, 2014 + * + */ +public class LinkUtils { + + /** + * This method checks the alignment.Link instances of the given JCas. + * If something is missing / not correct, it will throw an exception. + * Use this method within the development process of an AlignerComponent, + * (to check your aligner output is acceptable) or of an EDA. + * + * (Consider this as a minimal checker that will check the conventions + * on link instances that EOP developers have all agreed upon.) + * + * Note that the prober code will some detail about the content that it checks + * on log4j as DEBUG. + * + * Note that, no link makes no exception. (A possible output from an PairAnnotator). + * It only check the link instances. + * + * @param aJCas JCas with EOP views and Links. + */ + public static void probeLinksInCAS(JCas aJCas) throws CASException + { + // TODO work on this once + + } + + /** + * This utility method fetches alignment.Link instances that links the give + * "Type" of annotations. More specifically, the method returns all link + * instances that connects Targets, which holds the give "type". + * + * For example, a call with type=Token.class will return all Link instances + * where either of its TSideTarget or HSideTarget holds "Token" annotation. + * + * The method will return all link instances, if one of its Targets hold + * the given the type. + * + * @param jCas the JCas with EOP views + * @param type target annotation class. + * @return a List that holds all links that satisfy the condition. If none satisfy the condition, it will return an empty List. + */ + public static List selectLinksWith(JCas aJCas, Class type) throws CASException + { + List resultList = new ArrayList(); + + JCas hypoView = aJCas.getView("HypothesisView"); + // get Links that satisfy the condition by iterating all Links just once. + + for (Link l : JCasUtil.select(hypoView, Link.class)) + { + // is this link holds type object in either of its target? + Target tt = l.getTSideTarget(); + Target ht = l.getHSideTarget(); + + if (JCasUtil.select(tt.getTargetAnnotations(), type).size() > 0) + { + // T side target does hold at least one of type instance. + resultList.add(l); + continue; // no need to check h side target + } + + if (JCasUtil.select(ht.getTargetAnnotations(), type).size() > 0) + { + // H side target does hold at least one of type instance. + resultList.add(l); + } + } + return resultList; + } + + + /** + * Use this utility method to fetch Links of specific aligner, or links of specific relations. + * The method will fetch only those links that satisfy your given condition (such as alignerID, versionID and linkInfo string) + * if you put null, it would ignore that part of the link ID (as don't care). + * + * For example, alignerID = "WordNetAligner", with versionID=null and linkInfo = null, + * will fetch all the links that has been added by "WordNetAligner", regardless of version and linkInfo. + * + * @param aJCas + * @param alignerID + * @param versionID + * @param linkInfo + * @return + * @throws CASException + */ + public static List selectLinksWith(JCas aJCas, String alignerID, String versionID, String linkInfo) throws CASException + { + // get links with those names; + // "null" means "don't care". + List resultList = new ArrayList(); + + JCas hypoView = aJCas.getView("HypothesisView"); + // get Links that satisfy the condition by iterating all Links just once. + + for (Link l : JCasUtil.select(hypoView, Link.class)) + { + // pass if the link does not satisfy the given IDs. + if (alignerID != null && (!l.getAlignerID().equals(alignerID))) + continue; // condition given and no match - skip this one + + if (versionID != null && (!l.getAlignerVersion().equals(versionID))) + continue; // condition given and no match - skip this one. + + if (linkInfo != null && (!l.getLinkInfo().equals(linkInfo)) ) + continue; // condition given and no match - skip. + + // Okay, all given conditions are met. push it. + resultList.add(l); + } + return resultList; + } + + /** + * Use this utility method to fetch Links added by specific aligner. + * The method will fetch only those links that satisfy your given condition (alignerID) + * where the ID would be checked by alignment.Link.getAlignerID() + * + * @param aJCas + * @param alignerID that will be compared to link.getAlignerID() + * @return + * @throws CASException + */ + public static List selectLinksWith(JCas aJCas, String alignerID) throws CASException + { + return selectLinksWith(aJCas, alignerID, null, null); + } + +// public static List selectLinksWith(String fullID) +// { +// // get links where link.getID() == fullID +// // TODO work on this once +// return null; +// } + + /** + * Utility class that is useful to see what surface level (token level) Links are added in the given CAS. + * This method iterates all Links that includes tokens within their targets, and shows them (only tokens! + * does not show other items in Target. ) --- Thus, the method is not generic enough to be used to check + * Link (and targets) that links more than tokens. + * + * @param aJCas + * @param os + */ + public static void dumpTokenLevelLinks(JCas aJCas, PrintStream ps) throws CASException + { + // get all links that connects Tokens... + + List tokenLevelLinks = selectLinksWith(aJCas, Token.class); + + ps.println("The CAS has " + tokenLevelLinks.size() + " Link instances in it."); + + int linkNum = 0; + for(Link l : tokenLevelLinks) + { + // output to the give output stream + ps.print("Link " + linkNum); + // The link information + ps.println(" (" + l.getDirectionString() + ", " + l.getID() + ", " + l.getStrength() + ")"); + linkNum++; + + Target tside = l.getTSideTarget(); + Target hside = l.getHSideTarget(); + + // T side target has n tokens... TEXT(begin,end) TEXT(begin, end) ... + Collection tokens = JCasUtil.select(tside.getTargetAnnotations(), Token.class); + ps.print("\t TSide target has " + tokens.size() + " token(s): "); + for (Token t: tokens) + { + ps.print(t.getCoveredText() + "(" + t.getBegin() + "," + t.getEnd() + ") "); + } + ps.println(""); + + // H side target has m tokens ... + tokens = JCasUtil.select(hside.getTargetAnnotations(), Token.class); + ps.print("\t HSide target has " + tokens.size() + " token(s): "); + for (Token t: tokens) + { + ps.print(t.getCoveredText() + "(" + t.getBegin() + "," + t.getEnd() + ") "); + } + ps.println(""); + + } + } +} diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruth.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruth.java index cf3efd99..655904dc 100644 --- a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruth.java +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruth.java @@ -7,12 +7,13 @@ import org.apache.uima.jcas.JCasRegistry; import org.apache.uima.jcas.cas.TOP_Type; +import org.apache.uima.jcas.cas.FSArray; import org.apache.uima.jcas.tcas.Annotation; -/** This type represents a clause truth value annotation. - * Updated by JCasGen Fri Oct 05 20:17:26 CEST 2012 - * XML source: /Users/tailblues/progs/github/Excitement-Open-Platform/common/src/main/resources/desc/type/PredicateTruth.xml +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml * @generated */ public class ClauseTruth extends Annotation { /** @generated @@ -54,30 +55,61 @@ public ClauseTruth(JCas jcas, int begin, int end) { readObject(); } - /** + /** + * * Write your own initialization here * - @generated modifiable */ + * + * @generated modifiable + */ private void readObject() {/*default - does nothing empty block */} //*--------------* - //* Feature: value + //* Feature: clauseTokens - /** getter for value - gets This represents the value of the annotation. - * @generated */ - public String getValue() { - if (ClauseTruth_Type.featOkTst && ((ClauseTruth_Type)jcasType).casFeat_value == null) - jcasType.jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.ClauseTruth"); - return jcasType.ll_cas.ll_getStringValue(addr, ((ClauseTruth_Type)jcasType).casFeatCode_value);} + /** getter for clauseTokens - gets + * @generated + * @return value of the feature + */ + public FSArray getClauseTokens() { + if (ClauseTruth_Type.featOkTst && ((ClauseTruth_Type)jcasType).casFeat_clauseTokens == null) + jcasType.jcas.throwFeatMissing("clauseTokens", "eu.excitement.type.predicatetruth.ClauseTruth"); + return (FSArray)(jcasType.ll_cas.ll_getFSForRef(jcasType.ll_cas.ll_getRefValue(addr, ((ClauseTruth_Type)jcasType).casFeatCode_clauseTokens)));} - /** setter for value - sets This represents the value of the annotation. - * @generated */ - public void setValue(String v) { - if (ClauseTruth_Type.featOkTst && ((ClauseTruth_Type)jcasType).casFeat_value == null) - jcasType.jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.ClauseTruth"); - jcasType.ll_cas.ll_setStringValue(addr, ((ClauseTruth_Type)jcasType).casFeatCode_value, v);} + /** setter for clauseTokens - sets + * @generated + * @param v value to set into the feature + */ + public void setClauseTokens(FSArray v) { + if (ClauseTruth_Type.featOkTst && ((ClauseTruth_Type)jcasType).casFeat_clauseTokens == null) + jcasType.jcas.throwFeatMissing("clauseTokens", "eu.excitement.type.predicatetruth.ClauseTruth"); + jcasType.ll_cas.ll_setRefValue(addr, ((ClauseTruth_Type)jcasType).casFeatCode_clauseTokens, jcasType.ll_cas.ll_getFSRef(v));} + + /** indexed getter for clauseTokens - gets an indexed value - This is an array that can hold one or +more tokens. Representing the tokens which comprise this clause. + * @generated + * @param i index in the array to get + * @return value of the element at index i + */ + public Annotation getClauseTokens(int i) { + if (ClauseTruth_Type.featOkTst && ((ClauseTruth_Type)jcasType).casFeat_clauseTokens == null) + jcasType.jcas.throwFeatMissing("clauseTokens", "eu.excitement.type.predicatetruth.ClauseTruth"); + jcasType.jcas.checkArrayBounds(jcasType.ll_cas.ll_getRefValue(addr, ((ClauseTruth_Type)jcasType).casFeatCode_clauseTokens), i); + return (Annotation)(jcasType.ll_cas.ll_getFSForRef(jcasType.ll_cas.ll_getRefArrayValue(jcasType.ll_cas.ll_getRefValue(addr, ((ClauseTruth_Type)jcasType).casFeatCode_clauseTokens), i)));} + + /** indexed setter for clauseTokens - sets an indexed value - This is an array that can hold one or +more tokens. Representing the tokens which comprise this clause. + * @generated + * @param i index in the array to set + * @param v value to set into the array + */ + public void setClauseTokens(int i, Annotation v) { + if (ClauseTruth_Type.featOkTst && ((ClauseTruth_Type)jcasType).casFeat_clauseTokens == null) + jcasType.jcas.throwFeatMissing("clauseTokens", "eu.excitement.type.predicatetruth.ClauseTruth"); + jcasType.jcas.checkArrayBounds(jcasType.ll_cas.ll_getRefValue(addr, ((ClauseTruth_Type)jcasType).casFeatCode_clauseTokens), i); + jcasType.ll_cas.ll_setRefArrayValue(jcasType.ll_cas.ll_getRefValue(addr, ((ClauseTruth_Type)jcasType).casFeatCode_clauseTokens), i, jcasType.ll_cas.ll_getFSRef(v));} } \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNegative.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNegative.java new file mode 100644 index 00000000..edb61bc2 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNegative.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Mon Jul 14 22:26:14 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class ClauseTruthNegative extends ClauseTruth { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(ClauseTruthNegative.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected ClauseTruthNegative() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public ClauseTruthNegative(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public ClauseTruthNegative(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public ClauseTruthNegative(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNegative_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNegative_Type.java new file mode 100644 index 00000000..c42a799c --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNegative_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Mon Jul 14 22:26:14 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class ClauseTruthNegative_Type extends ClauseTruth_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (ClauseTruthNegative_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = ClauseTruthNegative_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new ClauseTruthNegative(addr, ClauseTruthNegative_Type.this); + ClauseTruthNegative_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new ClauseTruthNegative(addr, ClauseTruthNegative_Type.this); + } + }; + /** @generated */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = ClauseTruthNegative.typeIndexID; + /** @generated + @modifiable */ + // @SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.ClauseTruthNegative"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public ClauseTruthNegative_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNotIdentified.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNotIdentified.java new file mode 100644 index 00000000..40728a0b --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNotIdentified.java @@ -0,0 +1,77 @@ + + +/* First created by JCasGen Mon Jul 14 22:29:07 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class ClauseTruthNotIdentified extends ClauseTruth { + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(ClauseTruthNotIdentified.class); + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected ClauseTruthNotIdentified() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public ClauseTruthNotIdentified(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public ClauseTruthNotIdentified(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public ClauseTruthNotIdentified(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNotIdentified_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNotIdentified_Type.java new file mode 100644 index 00000000..a77ca60a --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthNotIdentified_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Mon Jul 14 22:29:07 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class ClauseTruthNotIdentified_Type extends ClauseTruth_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (ClauseTruthNotIdentified_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = ClauseTruthNotIdentified_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new ClauseTruthNotIdentified(addr, ClauseTruthNotIdentified_Type.this); + ClauseTruthNotIdentified_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new ClauseTruthNotIdentified(addr, ClauseTruthNotIdentified_Type.this); + } + }; + /** @generated */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = ClauseTruthNotIdentified.typeIndexID; + /** @generated + @modifiable */ + // @SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.ClauseTruthNotIdentified"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public ClauseTruthNotIdentified_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthPositive.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthPositive.java new file mode 100644 index 00000000..6117eb03 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthPositive.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Mon Jul 14 20:04:02 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class ClauseTruthPositive extends ClauseTruth { + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(ClauseTruthPositive.class); + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected ClauseTruthPositive() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public ClauseTruthPositive(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public ClauseTruthPositive(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public ClauseTruthPositive(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthPositive_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthPositive_Type.java new file mode 100644 index 00000000..b01a20fa --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthPositive_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Mon Jul 14 20:04:02 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class ClauseTruthPositive_Type extends ClauseTruth_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (ClauseTruthPositive_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = ClauseTruthPositive_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new ClauseTruthPositive(addr, ClauseTruthPositive_Type.this); + ClauseTruthPositive_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new ClauseTruthPositive(addr, ClauseTruthPositive_Type.this); + } + }; + /** @generated */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = ClauseTruthPositive.typeIndexID; + /** @generated + @modifiable */ + // @SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.ClauseTruthPositive"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public ClauseTruthPositive_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUncertain.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUncertain.java new file mode 100644 index 00000000..e018b1d9 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUncertain.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Mon Jul 14 22:26:14 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class ClauseTruthUncertain extends ClauseTruth { + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(ClauseTruthUncertain.class); + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected ClauseTruthUncertain() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public ClauseTruthUncertain(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public ClauseTruthUncertain(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public ClauseTruthUncertain(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUncertain_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUncertain_Type.java new file mode 100644 index 00000000..c9be7bfb --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUncertain_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Mon Jul 14 22:26:14 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class ClauseTruthUncertain_Type extends ClauseTruth_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (ClauseTruthUncertain_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = ClauseTruthUncertain_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new ClauseTruthUncertain(addr, ClauseTruthUncertain_Type.this); + ClauseTruthUncertain_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new ClauseTruthUncertain(addr, ClauseTruthUncertain_Type.this); + } + }; + /** @generated */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = ClauseTruthUncertain.typeIndexID; + /** @generated + @modifiable */ + // @SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.ClauseTruthUncertain"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public ClauseTruthUncertain_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUnknown.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUnknown.java new file mode 100644 index 00000000..9a56b5b7 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUnknown.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Mon Jul 14 22:27:25 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Mon Jul 14 22:27:25 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/ClauseTruth.xml + * @generated */ +public class ClauseTruthUnknown extends ClauseTruth { + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(ClauseTruthUnknown.class); + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected ClauseTruthUnknown() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public ClauseTruthUnknown(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public ClauseTruthUnknown(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public ClauseTruthUnknown(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUnknown_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUnknown_Type.java new file mode 100644 index 00000000..8e02364a --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruthUnknown_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Mon Jul 14 22:27:25 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Mon Jul 14 22:27:25 IDT 2014 + * @generated */ +public class ClauseTruthUnknown_Type extends ClauseTruth_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (ClauseTruthUnknown_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = ClauseTruthUnknown_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new ClauseTruthUnknown(addr, ClauseTruthUnknown_Type.this); + ClauseTruthUnknown_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new ClauseTruthUnknown(addr, ClauseTruthUnknown_Type.this); + } + }; + /** @generated */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = ClauseTruthUnknown.typeIndexID; + /** @generated + @modifiable */ + // @SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.ClauseTruthUnknown"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public ClauseTruthUnknown_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruth_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruth_Type.java index 26c17a5d..8a787a24 100644 --- a/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruth_Type.java +++ b/common/src/main/java/eu/excitement/type/predicatetruth/ClauseTruth_Type.java @@ -13,8 +13,8 @@ import org.apache.uima.cas.Feature; import org.apache.uima.jcas.tcas.Annotation_Type; -/** This type represents a clause truth value annotation. - * Updated by JCasGen Fri Oct 05 20:17:26 CEST 2012 +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 * @generated */ public class ClauseTruth_Type extends Annotation_Type { /** @generated */ @@ -45,22 +45,55 @@ public FeatureStructure createFS(int addr, CASImpl cas) { public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.ClauseTruth"); /** @generated */ - final Feature casFeat_value; + final Feature casFeat_clauseTokens; /** @generated */ - final int casFeatCode_value; - /** @generated */ - public String getValue(int addr) { - if (featOkTst && casFeat_value == null) - jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.ClauseTruth"); - return ll_cas.ll_getStringValue(addr, casFeatCode_value); + final int casFeatCode_clauseTokens; + /** @generated + * @param addr low level Feature Structure reference + * @return the feature value + */ + public int getClauseTokens(int addr) { + if (featOkTst && casFeat_clauseTokens == null) + jcas.throwFeatMissing("clauseTokens", "eu.excitement.type.predicatetruth.ClauseTruth"); + return ll_cas.ll_getRefValue(addr, casFeatCode_clauseTokens); } - /** @generated */ - public void setValue(int addr, String v) { - if (featOkTst && casFeat_value == null) - jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.ClauseTruth"); - ll_cas.ll_setStringValue(addr, casFeatCode_value, v);} + /** @generated + * @param addr low level Feature Structure reference + * @param v value to set + */ + public void setClauseTokens(int addr, int v) { + if (featOkTst && casFeat_clauseTokens == null) + jcas.throwFeatMissing("clauseTokens", "eu.excitement.type.predicatetruth.ClauseTruth"); + ll_cas.ll_setRefValue(addr, casFeatCode_clauseTokens, v);} - + /** @generated + * @param addr low level Feature Structure reference + * @param i index of item in the array + * @return value at index i in the array + */ + public int getClauseTokens(int addr, int i) { + if (featOkTst && casFeat_clauseTokens == null) + jcas.throwFeatMissing("clauseTokens", "eu.excitement.type.predicatetruth.ClauseTruth"); + if (lowLevelTypeChecks) + return ll_cas.ll_getRefArrayValue(ll_cas.ll_getRefValue(addr, casFeatCode_clauseTokens), i, true); + jcas.checkArrayBounds(ll_cas.ll_getRefValue(addr, casFeatCode_clauseTokens), i); + return ll_cas.ll_getRefArrayValue(ll_cas.ll_getRefValue(addr, casFeatCode_clauseTokens), i); + } + + /** @generated + * @param addr low level Feature Structure reference + * @param i index of item in the array + * @param v value to set + */ + public void setClauseTokens(int addr, int i, int v) { + if (featOkTst && casFeat_clauseTokens == null) + jcas.throwFeatMissing("clauseTokens", "eu.excitement.type.predicatetruth.ClauseTruth"); + if (lowLevelTypeChecks) + ll_cas.ll_setRefArrayValue(ll_cas.ll_getRefValue(addr, casFeatCode_clauseTokens), i, v, true); + jcas.checkArrayBounds(ll_cas.ll_getRefValue(addr, casFeatCode_clauseTokens), i); + ll_cas.ll_setRefArrayValue(ll_cas.ll_getRefValue(addr, casFeatCode_clauseTokens), i, v); + } + @@ -71,8 +104,8 @@ public ClauseTruth_Type(JCas jcas, Type casType) { casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); - casFeat_value = jcas.getRequiredFeatureDE(casType, "value", "eu.excitement.type.predicatetruth.ClauseTruthValue", featOkTst); - casFeatCode_value = (null == casFeat_value) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_value).getCode(); + casFeat_clauseTokens = jcas.getRequiredFeatureDE(casType, "clauseTokens", "uima.cas.FSArray", featOkTst); + casFeatCode_clauseTokens = (null == casFeat_clauseTokens) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_clauseTokens).getCode(); } } diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertainty.java b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertainty.java index d62e99ea..48a1845e 100644 --- a/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertainty.java +++ b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertainty.java @@ -10,9 +10,9 @@ import org.apache.uima.jcas.tcas.Annotation; -/** This type represents a negation-and-uncertainty annotation. - * Updated by JCasGen Fri Oct 05 20:17:26 CEST 2012 - * XML source: /Users/tailblues/progs/github/Excitement-Open-Platform/common/src/main/resources/desc/type/PredicateTruth.xml +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml * @generated */ public class NegationAndUncertainty extends Annotation { /** @generated @@ -54,30 +54,17 @@ public NegationAndUncertainty(JCas jcas, int begin, int end) { readObject(); } - /** + /** + * * Write your own initialization here * - @generated modifiable */ + * + * @generated modifiable + */ private void readObject() {/*default - does nothing empty block */} - //*--------------* - //* Feature: value - - /** getter for value - gets This represents the value of the annotation. - * @generated */ - public String getValue() { - if (NegationAndUncertainty_Type.featOkTst && ((NegationAndUncertainty_Type)jcasType).casFeat_value == null) - jcasType.jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.NegationAndUncertainty"); - return jcasType.ll_cas.ll_getStringValue(addr, ((NegationAndUncertainty_Type)jcasType).casFeatCode_value);} - - /** setter for value - sets This represents the value of the annotation. - * @generated */ - public void setValue(String v) { - if (NegationAndUncertainty_Type.featOkTst && ((NegationAndUncertainty_Type)jcasType).casFeat_value == null) - jcasType.jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.NegationAndUncertainty"); - jcasType.ll_cas.ll_setStringValue(addr, ((NegationAndUncertainty_Type)jcasType).casFeatCode_value, v);} - } +} \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyNegative.java b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyNegative.java new file mode 100644 index 00000000..a72364b1 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyNegative.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class NegationAndUncertaintyNegative extends NegationAndUncertainty { + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(NegationAndUncertaintyNegative.class); + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected NegationAndUncertaintyNegative() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public NegationAndUncertaintyNegative(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public NegationAndUncertaintyNegative(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public NegationAndUncertaintyNegative(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyNegative_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyNegative_Type.java new file mode 100644 index 00000000..1c745a34 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyNegative_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class NegationAndUncertaintyNegative_Type extends NegationAndUncertainty_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (NegationAndUncertaintyNegative_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = NegationAndUncertaintyNegative_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new NegationAndUncertaintyNegative(addr, NegationAndUncertaintyNegative_Type.this); + NegationAndUncertaintyNegative_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new NegationAndUncertaintyNegative(addr, NegationAndUncertaintyNegative_Type.this); + } + }; + /** @generated */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = NegationAndUncertaintyNegative.typeIndexID; + /** @generated + @modifiable */ + // @SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.NegationAndUncertaintyNegative"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public NegationAndUncertaintyNegative_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyPositive.java b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyPositive.java new file mode 100644 index 00000000..4886c09a --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyPositive.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class NegationAndUncertaintyPositive extends NegationAndUncertainty { + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(NegationAndUncertaintyPositive.class); + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected NegationAndUncertaintyPositive() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public NegationAndUncertaintyPositive(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public NegationAndUncertaintyPositive(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public NegationAndUncertaintyPositive(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyPositive_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyPositive_Type.java new file mode 100644 index 00000000..a82fa667 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyPositive_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class NegationAndUncertaintyPositive_Type extends NegationAndUncertainty_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (NegationAndUncertaintyPositive_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = NegationAndUncertaintyPositive_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new NegationAndUncertaintyPositive(addr, NegationAndUncertaintyPositive_Type.this); + NegationAndUncertaintyPositive_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new NegationAndUncertaintyPositive(addr, NegationAndUncertaintyPositive_Type.this); + } + }; + /** @generated */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = NegationAndUncertaintyPositive.typeIndexID; + /** @generated + @modifiable */ + // @SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.NegationAndUncertaintyPositive"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public NegationAndUncertaintyPositive_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyUncertain.java b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyUncertain.java new file mode 100644 index 00000000..4d5799d4 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyUncertain.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class NegationAndUncertaintyUncertain extends NegationAndUncertainty { + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(NegationAndUncertaintyUncertain.class); + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected NegationAndUncertaintyUncertain() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public NegationAndUncertaintyUncertain(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public NegationAndUncertaintyUncertain(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public NegationAndUncertaintyUncertain(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyUncertain_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyUncertain_Type.java new file mode 100644 index 00000000..f1ed6f5e --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertaintyUncertain_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class NegationAndUncertaintyUncertain_Type extends NegationAndUncertainty_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (NegationAndUncertaintyUncertain_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = NegationAndUncertaintyUncertain_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new NegationAndUncertaintyUncertain(addr, NegationAndUncertaintyUncertain_Type.this); + NegationAndUncertaintyUncertain_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new NegationAndUncertaintyUncertain(addr, NegationAndUncertaintyUncertain_Type.this); + } + }; + /** @generated */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = NegationAndUncertaintyUncertain.typeIndexID; + /** @generated + @modifiable */ + // @SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.NegationAndUncertaintyUncertain"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public NegationAndUncertaintyUncertain_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertainty_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertainty_Type.java index 7feda93c..e3486b1b 100644 --- a/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertainty_Type.java +++ b/common/src/main/java/eu/excitement/type/predicatetruth/NegationAndUncertainty_Type.java @@ -2,19 +2,17 @@ /* First created by JCasGen Fri Oct 05 20:17:26 CEST 2012 */ package eu.excitement.type.predicatetruth; -import org.apache.uima.jcas.JCas; -import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.Type; import org.apache.uima.cas.impl.CASImpl; import org.apache.uima.cas.impl.FSGenerator; -import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.impl.TypeImpl; -import org.apache.uima.cas.Type; -import org.apache.uima.cas.impl.FeatureImpl; -import org.apache.uima.cas.Feature; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; import org.apache.uima.jcas.tcas.Annotation_Type; -/** This type represents a negation-and-uncertainty annotation. - * Updated by JCasGen Fri Oct 05 20:17:26 CEST 2012 +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 * @generated */ public class NegationAndUncertainty_Type extends Annotation_Type { /** @generated */ @@ -44,36 +42,12 @@ public FeatureStructure createFS(int addr, CASImpl cas) { //@SuppressWarnings ("hiding") public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.NegationAndUncertainty"); - /** @generated */ - final Feature casFeat_value; - /** @generated */ - final int casFeatCode_value; - /** @generated */ - public String getValue(int addr) { - if (featOkTst && casFeat_value == null) - jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.NegationAndUncertainty"); - return ll_cas.ll_getStringValue(addr, casFeatCode_value); - } - /** @generated */ - public void setValue(int addr, String v) { - if (featOkTst && casFeat_value == null) - jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.NegationAndUncertainty"); - ll_cas.ll_setStringValue(addr, casFeatCode_value, v);} - - - - - /** initialize variables to correspond with Cas Type and Features * @generated */ public NegationAndUncertainty_Type(JCas jcas, Type casType) { super(jcas, casType); casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); - - casFeat_value = jcas.getRequiredFeatureDE(casType, "value", "eu.excitement.type.predicatetruth.NegationAndUncertaintyValue", featOkTst); - casFeatCode_value = (null == casFeat_value) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_value).getCode(); - } } diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignature.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignature.java index 5f5ca449..7a332e34 100644 --- a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignature.java +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignature.java @@ -10,9 +10,9 @@ import org.apache.uima.jcas.tcas.Annotation; -/** This type represents an implication signature of a predicate. - * Updated by JCasGen Fri Oct 05 20:17:26 CEST 2012 - * XML source: /Users/tailblues/progs/github/Excitement-Open-Platform/common/src/main/resources/desc/type/PredicateTruth.xml +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml * @generated */ public class PredicateSignature extends Annotation { /** @generated @@ -54,10 +54,13 @@ public PredicateSignature(JCas jcas, int begin, int end) { readObject(); } - /** + /** + * * Write your own initialization here * - @generated modifiable */ + * + * @generated modifiable + */ private void readObject() {/*default - does nothing empty block */} } diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeNegative.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeNegative.java new file mode 100644 index 00000000..3cc9d301 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeNegative.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateSignatureNegativeNegative extends PredicateSignature { + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateSignatureNegativeNegative.class); + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateSignatureNegativeNegative() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateSignatureNegativeNegative(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateSignatureNegativeNegative(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateSignatureNegativeNegative(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeNegative_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeNegative_Type.java new file mode 100644 index 00000000..c665ac7e --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeNegative_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateSignatureNegativeNegative_Type extends PredicateSignature_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateSignatureNegativeNegative_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateSignatureNegativeNegative_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateSignatureNegativeNegative(addr, PredicateSignatureNegativeNegative_Type.this); + PredicateSignatureNegativeNegative_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateSignatureNegativeNegative(addr, PredicateSignatureNegativeNegative_Type.this); + } + }; + /** @generated */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateSignatureNegativeNegative.typeIndexID; + /** @generated + @modifiable */ + // @SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateSignatureNegativeNegative"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateSignatureNegativeNegative_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativePositive.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativePositive.java new file mode 100644 index 00000000..a926ff54 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativePositive.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateSignatureNegativePositive extends PredicateSignature { + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateSignatureNegativePositive.class); + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateSignatureNegativePositive() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateSignatureNegativePositive(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateSignatureNegativePositive(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateSignatureNegativePositive(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativePositive_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativePositive_Type.java new file mode 100644 index 00000000..4077c3ec --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativePositive_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateSignatureNegativePositive_Type extends PredicateSignature_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateSignatureNegativePositive_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateSignatureNegativePositive_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateSignatureNegativePositive(addr, PredicateSignatureNegativePositive_Type.this); + PredicateSignatureNegativePositive_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateSignatureNegativePositive(addr, PredicateSignatureNegativePositive_Type.this); + } + }; + /** @generated */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateSignatureNegativePositive.typeIndexID; + /** @generated + @modifiable */ + // @SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateSignatureNegativePositive"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateSignatureNegativePositive_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeUncertain.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeUncertain.java new file mode 100644 index 00000000..f1caae9b --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeUncertain.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateSignatureNegativeUncertain extends PredicateSignature { + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateSignatureNegativeUncertain.class); + /** @generated + * @ordered + */ + // @SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateSignatureNegativeUncertain() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateSignatureNegativeUncertain(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateSignatureNegativeUncertain(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateSignatureNegativeUncertain(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeUncertain_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeUncertain_Type.java new file mode 100644 index 00000000..e0697631 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureNegativeUncertain_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateSignatureNegativeUncertain_Type extends PredicateSignature_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateSignatureNegativeUncertain_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateSignatureNegativeUncertain_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateSignatureNegativeUncertain(addr, PredicateSignatureNegativeUncertain_Type.this); + PredicateSignatureNegativeUncertain_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateSignatureNegativeUncertain(addr, PredicateSignatureNegativeUncertain_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateSignatureNegativeUncertain.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateSignatureNegativeUncertain"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateSignatureNegativeUncertain_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveNegative.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveNegative.java new file mode 100644 index 00000000..a96c2ada --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveNegative.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateSignaturePositiveNegative extends PredicateSignature { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateSignaturePositiveNegative.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateSignaturePositiveNegative() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateSignaturePositiveNegative(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateSignaturePositiveNegative(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateSignaturePositiveNegative(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveNegative_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveNegative_Type.java new file mode 100644 index 00000000..2e6bb5c3 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveNegative_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateSignaturePositiveNegative_Type extends PredicateSignature_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateSignaturePositiveNegative_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateSignaturePositiveNegative_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateSignaturePositiveNegative(addr, PredicateSignaturePositiveNegative_Type.this); + PredicateSignaturePositiveNegative_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateSignaturePositiveNegative(addr, PredicateSignaturePositiveNegative_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateSignaturePositiveNegative.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateSignaturePositiveNegative"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateSignaturePositiveNegative_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositivePositive.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositivePositive.java new file mode 100644 index 00000000..4295b6f0 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositivePositive.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateSignaturePositivePositive extends PredicateSignature { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateSignaturePositivePositive.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateSignaturePositivePositive() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateSignaturePositivePositive(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateSignaturePositivePositive(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateSignaturePositivePositive(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositivePositive_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositivePositive_Type.java new file mode 100644 index 00000000..fef06043 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositivePositive_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateSignaturePositivePositive_Type extends PredicateSignature_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateSignaturePositivePositive_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateSignaturePositivePositive_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateSignaturePositivePositive(addr, PredicateSignaturePositivePositive_Type.this); + PredicateSignaturePositivePositive_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateSignaturePositivePositive(addr, PredicateSignaturePositivePositive_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateSignaturePositivePositive.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateSignaturePositivePositive"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateSignaturePositivePositive_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveUncertain.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveUncertain.java new file mode 100644 index 00000000..2b825649 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveUncertain.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateSignaturePositiveUncertain extends PredicateSignature { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateSignaturePositiveUncertain.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateSignaturePositiveUncertain() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateSignaturePositiveUncertain(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateSignaturePositiveUncertain(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateSignaturePositiveUncertain(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveUncertain_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveUncertain_Type.java new file mode 100644 index 00000000..be90deb1 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignaturePositiveUncertain_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateSignaturePositiveUncertain_Type extends PredicateSignature_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateSignaturePositiveUncertain_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateSignaturePositiveUncertain_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateSignaturePositiveUncertain(addr, PredicateSignaturePositiveUncertain_Type.this); + PredicateSignaturePositiveUncertain_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateSignaturePositiveUncertain(addr, PredicateSignaturePositiveUncertain_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateSignaturePositiveUncertain.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateSignaturePositiveUncertain"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateSignaturePositiveUncertain_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainNegative.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainNegative.java new file mode 100644 index 00000000..045ce06f --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainNegative.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateSignatureUncertainNegative extends PredicateSignature { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateSignatureUncertainNegative.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateSignatureUncertainNegative() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateSignatureUncertainNegative(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateSignatureUncertainNegative(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateSignatureUncertainNegative(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainNegative_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainNegative_Type.java new file mode 100644 index 00000000..30cc702c --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainNegative_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateSignatureUncertainNegative_Type extends PredicateSignature_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateSignatureUncertainNegative_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateSignatureUncertainNegative_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateSignatureUncertainNegative(addr, PredicateSignatureUncertainNegative_Type.this); + PredicateSignatureUncertainNegative_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateSignatureUncertainNegative(addr, PredicateSignatureUncertainNegative_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateSignatureUncertainNegative.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateSignatureUncertainNegative"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateSignatureUncertainNegative_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainPositive.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainPositive.java new file mode 100644 index 00000000..8aba604f --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainPositive.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateSignatureUncertainPositive extends PredicateSignature { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateSignatureUncertainPositive.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateSignatureUncertainPositive() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateSignatureUncertainPositive(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateSignatureUncertainPositive(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateSignatureUncertainPositive(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainPositive_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainPositive_Type.java new file mode 100644 index 00000000..a03c06e1 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainPositive_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateSignatureUncertainPositive_Type extends PredicateSignature_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateSignatureUncertainPositive_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateSignatureUncertainPositive_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateSignatureUncertainPositive(addr, PredicateSignatureUncertainPositive_Type.this); + PredicateSignatureUncertainPositive_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateSignatureUncertainPositive(addr, PredicateSignatureUncertainPositive_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateSignatureUncertainPositive.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateSignatureUncertainPositive"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateSignatureUncertainPositive_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainUncertain.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainUncertain.java new file mode 100644 index 00000000..8c2b8249 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainUncertain.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateSignatureUncertainUncertain extends PredicateSignature { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateSignatureUncertainUncertain.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateSignatureUncertainUncertain() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateSignatureUncertainUncertain(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateSignatureUncertainUncertain(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateSignatureUncertainUncertain(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainUncertain_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainUncertain_Type.java new file mode 100644 index 00000000..efcbadb4 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignatureUncertainUncertain_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Tue Jul 15 10:01:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateSignatureUncertainUncertain_Type extends PredicateSignature_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateSignatureUncertainUncertain_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateSignatureUncertainUncertain_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateSignatureUncertainUncertain(addr, PredicateSignatureUncertainUncertain_Type.this); + PredicateSignatureUncertainUncertain_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateSignatureUncertainUncertain(addr, PredicateSignatureUncertainUncertain_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateSignatureUncertainUncertain.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateSignatureUncertainUncertain"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateSignatureUncertainUncertain_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignature_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignature_Type.java index 5e9d43fc..8175a0fd 100644 --- a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignature_Type.java +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateSignature_Type.java @@ -11,8 +11,8 @@ import org.apache.uima.cas.Type; import org.apache.uima.jcas.tcas.Annotation_Type; -/** This type represents an implication signature of a predicate. - * Updated by JCasGen Fri Oct 05 20:17:26 CEST 2012 +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 * @generated */ public class PredicateSignature_Type extends Annotation_Type { /** @generated */ diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruth.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruth.java index b75cbb66..d7da4534 100644 --- a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruth.java +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruth.java @@ -10,9 +10,11 @@ import org.apache.uima.jcas.tcas.Annotation; -/** This type represents a predicate truth value annotation. - * Updated by JCasGen Fri Oct 05 20:17:26 CEST 2012 - * XML source: /Users/tailblues/progs/github/Excitement-Open-Platform/common/src/main/resources/desc/type/PredicateTruth.xml +/** This type represents a predicate truth value annotation. +It is an abstract representation from which the different Predicate Truth annotations will inherit (PT+,PT-,PT?). +This annotation covers a single predicate token. + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml * @generated */ public class PredicateTruth extends Annotation { /** @generated @@ -54,30 +56,17 @@ public PredicateTruth(JCas jcas, int begin, int end) { readObject(); } - /** + /** + * * Write your own initialization here * - @generated modifiable */ + * + * @generated modifiable + */ private void readObject() {/*default - does nothing empty block */} - //*--------------* - //* Feature: value - - /** getter for value - gets This represents the value of the annotation. - * @generated */ - public String getValue() { - if (PredicateTruth_Type.featOkTst && ((PredicateTruth_Type)jcasType).casFeat_value == null) - jcasType.jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.PredicateTruth"); - return jcasType.ll_cas.ll_getStringValue(addr, ((PredicateTruth_Type)jcasType).casFeatCode_value);} - - /** setter for value - sets This represents the value of the annotation. - * @generated */ - public void setValue(String v) { - if (PredicateTruth_Type.featOkTst && ((PredicateTruth_Type)jcasType).casFeat_value == null) - jcasType.jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.PredicateTruth"); - jcasType.ll_cas.ll_setStringValue(addr, ((PredicateTruth_Type)jcasType).casFeatCode_value, v);} - } +} \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNegative.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNegative.java new file mode 100644 index 00000000..18105e13 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNegative.java @@ -0,0 +1,77 @@ + + +/* First created by JCasGen Sun Jul 13 18:54:20 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + +/** This type annotates predicates with PT-. + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateTruthNegative extends PredicateTruth { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateTruthNegative.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateTruthNegative() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateTruthNegative(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateTruthNegative(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateTruthNegative(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNegative_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNegative_Type.java new file mode 100644 index 00000000..c14b8dd5 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNegative_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Sun Jul 13 18:54:20 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; + +/** This type annotates predicates with PT-. + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateTruthNegative_Type extends PredicateTruth_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateTruthNegative_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateTruthNegative_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateTruthNegative(addr, PredicateTruthNegative_Type.this); + PredicateTruthNegative_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateTruthNegative(addr, PredicateTruthNegative_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateTruthNegative.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateTruthNegative"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateTruthNegative_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNotIdentified.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNotIdentified.java new file mode 100644 index 00000000..c24a26a2 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNotIdentified.java @@ -0,0 +1,78 @@ + + +/* First created by JCasGen Mon Jul 14 14:59:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateTruthNotIdentified extends PredicateTruth { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateTruthNotIdentified.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateTruthNotIdentified() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateTruthNotIdentified(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateTruthNotIdentified(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateTruthNotIdentified(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNotIdentified_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNotIdentified_Type.java new file mode 100644 index 00000000..b9bb68d5 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthNotIdentified_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Mon Jul 14 14:59:13 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.cas.Type; + +/** + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateTruthNotIdentified_Type extends PredicateTruth_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateTruthNotIdentified_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateTruthNotIdentified_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateTruthNotIdentified(addr, PredicateTruthNotIdentified_Type.this); + PredicateTruthNotIdentified_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateTruthNotIdentified(addr, PredicateTruthNotIdentified_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateTruthNotIdentified.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateTruthNotIdentified"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateTruthNotIdentified_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthPositive.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthPositive.java new file mode 100644 index 00000000..f9eeeba2 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthPositive.java @@ -0,0 +1,77 @@ + + +/* First created by JCasGen Sun Jul 13 18:54:20 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + +/** This type annotates predicates with PT+. + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateTruthPositive extends PredicateTruth { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateTruthPositive.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateTruthPositive() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateTruthPositive(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateTruthPositive(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateTruthPositive(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthPositive_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthPositive_Type.java new file mode 100644 index 00000000..fc683228 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthPositive_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Sun Jul 13 18:54:20 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; + +/** This type annotates predicates with PT+. + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateTruthPositive_Type extends PredicateTruth_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateTruthPositive_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateTruthPositive_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateTruthPositive(addr, PredicateTruthPositive_Type.this); + PredicateTruthPositive_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateTruthPositive(addr, PredicateTruthPositive_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateTruthPositive.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateTruthPositive"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateTruthPositive_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthUncertain.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthUncertain.java new file mode 100644 index 00000000..a106d0c2 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthUncertain.java @@ -0,0 +1,77 @@ + + +/* First created by JCasGen Sun Jul 13 18:54:20 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.jcas.cas.TOP_Type; + + +/** This type annotates predicates with PT?. + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * XML source: C:/Users/user/fromHP/Shared/excitement workspace/eop/common/src/main/resources/desc/type/PredicateTruth.xml + * @generated */ +public class PredicateTruthUncertain extends PredicateTruth { + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = JCasRegistry.register(PredicateTruthUncertain.class); + /** @generated + * @ordered + */ + //@SuppressWarnings ("hiding") + public final static int type = typeIndexID; + /** @generated + * @return index of the type + */ + @Override + public int getTypeIndexID() {return typeIndexID;} + + /** Never called. Disable default constructor + * @generated */ + protected PredicateTruthUncertain() {/* intentionally empty block */} + + /** Internal - constructor used by generator + * @generated + * @param addr low level Feature Structure reference + * @param type the type of this Feature Structure + */ + public PredicateTruthUncertain(int addr, TOP_Type type) { + super(addr, type); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + */ + public PredicateTruthUncertain(JCas jcas) { + super(jcas); + readObject(); + } + + /** @generated + * @param jcas JCas to which this Feature Structure belongs + * @param begin offset to the begin spot in the SofA + * @param end offset to the end spot in the SofA + */ + public PredicateTruthUncertain(JCas jcas, int begin, int end) { + super(jcas); + setBegin(begin); + setEnd(end); + readObject(); + } + + /** + * + * Write your own initialization here + * + * + * @generated modifiable + */ + private void readObject() {/*default - does nothing empty block */} + +} + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthUncertain_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthUncertain_Type.java new file mode 100644 index 00000000..064b2d09 --- /dev/null +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruthUncertain_Type.java @@ -0,0 +1,62 @@ + +/* First created by JCasGen Sun Jul 13 18:54:20 IDT 2014 */ +package eu.excitement.type.predicatetruth; + +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.impl.CASImpl; +import org.apache.uima.cas.impl.FSGenerator; +import org.apache.uima.cas.impl.TypeImpl; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; + +/** This type annotates predicates with PT?. + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 + * @generated */ +public class PredicateTruthUncertain_Type extends PredicateTruth_Type { + /** @generated + * @return the generator for this type + */ + @Override + protected FSGenerator getFSGenerator() {return fsGenerator;} + /** @generated */ + private final FSGenerator fsGenerator = + new FSGenerator() { + public FeatureStructure createFS(int addr, CASImpl cas) { + if (PredicateTruthUncertain_Type.this.useExistingInstance) { + // Return eq fs instance if already created + FeatureStructure fs = PredicateTruthUncertain_Type.this.jcas.getJfsFromCaddr(addr); + if (null == fs) { + fs = new PredicateTruthUncertain(addr, PredicateTruthUncertain_Type.this); + PredicateTruthUncertain_Type.this.jcas.putJfsFromCaddr(addr, fs); + return fs; + } + return fs; + } else return new PredicateTruthUncertain(addr, PredicateTruthUncertain_Type.this); + } + }; + /** @generated */ + //@SuppressWarnings ("hiding") + public final static int typeIndexID = PredicateTruthUncertain.typeIndexID; + /** @generated + @modifiable */ + //@SuppressWarnings ("hiding") + public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateTruthUncertain"); + + + + /** initialize variables to correspond with Cas Type and Features + * @generated + * @param jcas JCas + * @param casType Type + */ + public PredicateTruthUncertain_Type(JCas jcas, Type casType) { + super(jcas, casType); + casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); + + } +} + + + + \ No newline at end of file diff --git a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruth_Type.java b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruth_Type.java index 39928ead..1908255c 100644 --- a/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruth_Type.java +++ b/common/src/main/java/eu/excitement/type/predicatetruth/PredicateTruth_Type.java @@ -2,19 +2,19 @@ /* First created by JCasGen Fri Oct 05 20:17:26 CEST 2012 */ package eu.excitement.type.predicatetruth; -import org.apache.uima.jcas.JCas; -import org.apache.uima.jcas.JCasRegistry; +import org.apache.uima.cas.FeatureStructure; +import org.apache.uima.cas.Type; import org.apache.uima.cas.impl.CASImpl; import org.apache.uima.cas.impl.FSGenerator; -import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.impl.TypeImpl; -import org.apache.uima.cas.Type; -import org.apache.uima.cas.impl.FeatureImpl; -import org.apache.uima.cas.Feature; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.JCasRegistry; import org.apache.uima.jcas.tcas.Annotation_Type; -/** This type represents a predicate truth value annotation. - * Updated by JCasGen Fri Oct 05 20:17:26 CEST 2012 +/** This type represents a predicate truth value annotation. +It is an abstract representation from which the different Predicate Truth annotations will inherit (PT+,PT-,PT?). +This annotation covers a single predicate token. + * Updated by JCasGen Tue Jul 15 10:01:13 IDT 2014 * @generated */ public class PredicateTruth_Type extends Annotation_Type { /** @generated */ @@ -44,36 +44,12 @@ public FeatureStructure createFS(int addr, CASImpl cas) { //@SuppressWarnings ("hiding") public final static boolean featOkTst = JCasRegistry.getFeatOkTst("eu.excitement.type.predicatetruth.PredicateTruth"); - /** @generated */ - final Feature casFeat_value; - /** @generated */ - final int casFeatCode_value; - /** @generated */ - public String getValue(int addr) { - if (featOkTst && casFeat_value == null) - jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.PredicateTruth"); - return ll_cas.ll_getStringValue(addr, casFeatCode_value); - } - /** @generated */ - public void setValue(int addr, String v) { - if (featOkTst && casFeat_value == null) - jcas.throwFeatMissing("value", "eu.excitement.type.predicatetruth.PredicateTruth"); - ll_cas.ll_setStringValue(addr, casFeatCode_value, v);} - - - - - /** initialize variables to correspond with Cas Type and Features * @generated */ public PredicateTruth_Type(JCas jcas, Type casType) { super(jcas, casType); casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator()); - - casFeat_value = jcas.getRequiredFeatureDE(casType, "value", "eu.excitement.type.predicatetruth.PredicateTruthValue", featOkTst); - casFeatCode_value = (null == casFeat_value) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_value).getCode(); - } } diff --git a/common/src/main/java/eu/excitementproject/eop/common/component/alignment/AlignmentComponentException.java b/common/src/main/java/eu/excitementproject/eop/common/component/alignment/AlignmentComponentException.java new file mode 100644 index 00000000..414ce458 --- /dev/null +++ b/common/src/main/java/eu/excitementproject/eop/common/component/alignment/AlignmentComponentException.java @@ -0,0 +1,38 @@ +/** + * + */ +package eu.excitementproject.eop.common.component.alignment; + +/** + * The exception type for AlignmentComponents + * + * @author Tae-Gil Noh + * + */ +public class AlignmentComponentException extends PairAnnotatorComponentException { + + private static final long serialVersionUID = 2157830765388558808L; + + /** + * @param message + */ + public AlignmentComponentException(String message) { + super(message); + } + + /** + * @param cause + */ + public AlignmentComponentException(Throwable cause) { + super(cause); + } + + /** + * @param message + * @param cause + */ + public AlignmentComponentException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/common/src/main/java/eu/excitementproject/eop/common/component/alignment/PairAnnotatorComponent.java b/common/src/main/java/eu/excitementproject/eop/common/component/alignment/PairAnnotatorComponent.java index 04fdf256..2d17432d 100644 --- a/common/src/main/java/eu/excitementproject/eop/common/component/alignment/PairAnnotatorComponent.java +++ b/common/src/main/java/eu/excitementproject/eop/common/component/alignment/PairAnnotatorComponent.java @@ -52,6 +52,6 @@ public interface PairAnnotatorComponent extends Component { * was successful --- even though there were no added annotations * (e.g. contradiction annotator, but there was no contradiction). */ - public void annotate(JCas aJCas); + public void annotate(JCas aJCas) throws PairAnnotatorComponentException; } diff --git a/common/src/main/java/eu/excitementproject/eop/common/component/alignment/PairAnnotatorComponentException.java b/common/src/main/java/eu/excitementproject/eop/common/component/alignment/PairAnnotatorComponentException.java new file mode 100644 index 00000000..ec221d92 --- /dev/null +++ b/common/src/main/java/eu/excitementproject/eop/common/component/alignment/PairAnnotatorComponentException.java @@ -0,0 +1,39 @@ +package eu.excitementproject.eop.common.component.alignment; + +import eu.excitementproject.eop.common.exception.ComponentException; + + +/** + * This is the exception prepared for PairAnnotatorComponent. + * @author Tae-Gil Noh + */ +public class PairAnnotatorComponentException extends ComponentException { + + /** + * + */ + private static final long serialVersionUID = -1068712599109788009L; + + /** + * @param message + */ + public PairAnnotatorComponentException(String message) { + super(message); + } + + /** + * @param cause + */ + public PairAnnotatorComponentException(Throwable cause) { + super(cause); + } + + /** + * @param message + * @param cause + */ + public PairAnnotatorComponentException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/common/src/main/java/eu/excitementproject/eop/common/datastructures/immutable/ImmutableList.java b/common/src/main/java/eu/excitementproject/eop/common/datastructures/immutable/ImmutableList.java index 90ce1fe0..f07c08cf 100644 --- a/common/src/main/java/eu/excitementproject/eop/common/datastructures/immutable/ImmutableList.java +++ b/common/src/main/java/eu/excitementproject/eop/common/datastructures/immutable/ImmutableList.java @@ -30,4 +30,6 @@ public interface ImmutableList extends ImmutableCollection, Serializable public String toString(); + public String mutableListToString(); + } diff --git a/common/src/main/java/eu/excitementproject/eop/common/datastructures/immutable/ImmutableListWrapper.java b/common/src/main/java/eu/excitementproject/eop/common/datastructures/immutable/ImmutableListWrapper.java index d282c205..43693621 100644 --- a/common/src/main/java/eu/excitementproject/eop/common/datastructures/immutable/ImmutableListWrapper.java +++ b/common/src/main/java/eu/excitementproject/eop/common/datastructures/immutable/ImmutableListWrapper.java @@ -126,6 +126,9 @@ public String toString() return this.getClass().getSimpleName()+" wrapping "+realList.getClass().getSimpleName()+": "+realList.toString(); } + public String mutableListToString() { + return realList.toString(); + } @Override public int hashCode() diff --git a/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/NodeShortString.java b/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/NodeShortString.java new file mode 100644 index 00000000..ec628a6e --- /dev/null +++ b/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/NodeShortString.java @@ -0,0 +1,86 @@ +package eu.excitementproject.eop.common.representation.parse.tree.dependency.view; + +import eu.excitementproject.eop.common.representation.parse.representation.basic.Info; +import eu.excitementproject.eop.common.representation.parse.representation.basic.InfoGetFields; +import eu.excitementproject.eop.common.representation.parse.tree.AbstractNode; + +public abstract class NodeShortString { + private static final String ROOT_STR = ""; + public abstract String toString(AbstractNode node); + + public static String prepConcrete(AbstractNode node) { + if (node.getInfo().getEdgeInfo().getDependencyRelation() != null && + node.getInfo().getEdgeInfo().getDependencyRelation().getStringRepresentation().equals("prep")) { + return "_" + node.getInfo().getNodeInfo().getWordLemma(); + } + else { + return ""; + } + } + + + //// Concrete Classes //////////////////////////////////////// + + public static class Rel extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR); + } + } + + public static class RelPrep extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+prepConcrete(node); + } + } + + public static class RelPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getPartOfSpeech(node.getInfo()); + } + } + + public static class RelPrepPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+prepConcrete(node)+"->"+InfoGetFields.getPartOfSpeech(node.getInfo()); + } + } + + public static class RelCanonicalPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+node.getInfo().getNodeInfo().getSyntacticInfo().getPartOfSpeech().getCanonicalPosTag(); + } + } + + public static class RelPrepCanonicalPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+prepConcrete(node)+"->"+node.getInfo().getNodeInfo().getSyntacticInfo().getPartOfSpeech().getCanonicalPosTag(); + } + } + + public static class WordRel extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getWord(node.getInfo()); + } + } + + public static class WordRelPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getWord(node.getInfo())+"/"+InfoGetFields.getPartOfSpeech(node.getInfo()); + } + } + + public static class WordRelCanonicalPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getWord(node.getInfo())+"/"+node.getInfo().getNodeInfo().getSyntacticInfo().getPartOfSpeech().getCanonicalPosTag(); + } + } +} diff --git a/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeStringGenerator.java b/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeStringGenerator.java index c6af6dbc..333fad90 100755 --- a/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeStringGenerator.java +++ b/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeStringGenerator.java @@ -16,6 +16,18 @@ * The graphical representation is merely several text lines, with characters * like "|" and "-" to represent edges, and the string produced by * a {@link NodeString} object. + *

+ * Example output for the sentence "De Villepin made no comment on arrival.":

+ *

+ *                                          (3 / made[make] / VBD / (null))                                         
+ *                                                         |                                                        
+ *                    |-------------------------------------|------------------------------------|                  
+ * (2 / Villepin[villepin] / NNP / nsubj)  (5 / comment[comment] / NN / dobj)        (6 / on[on] / IN / prep)       
+ *                    |                                     |                                    |                  
+ *                    |                                     |                                    |                  
+ *         (1 / De[de] / NNP / nn)               (4 / no[no] / DT / det)        (7 / arrival[arrival] / NN / pobj)  
+ * 
+ * * @author Asher Stern * */ @@ -230,10 +242,31 @@ public String generateString() throws TreeStringGeneratorException } } + /** + * Convenience method to print a tree with full node data (Id, word, lemma, POS, rel). + * @param root + * @return + * @throws TreeStringGeneratorException + * @author Ofer Bronstein + * @since August 2014 + */ + public static String treeToStringFull(AbstractNode root) throws TreeStringGeneratorException { + TreeStringGenerator gen = new TreeStringGenerator(new SimpleNodeString(), root); + return gen.generateString(); + } - - - + /** + * Convenience method to print a tree with word and POS in node. + * @param root + * @return + * @throws TreeStringGeneratorException + * @author Ofer Bronstein + * @since August 2014 + */ + public static String treeToStringWordPos(AbstractNode root) throws TreeStringGeneratorException { + TreeStringGenerator gen = new TreeStringGenerator(new WordAndPosNodeString(), root); + return gen.generateString(); + } protected AbstractNode root; diff --git a/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeToLineString.java b/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeToLineString.java new file mode 100644 index 00000000..fe443de3 --- /dev/null +++ b/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeToLineString.java @@ -0,0 +1,166 @@ +package eu.excitementproject.eop.common.representation.parse.tree.dependency.view; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import eu.excitementproject.eop.common.representation.parse.representation.basic.Info; +import eu.excitementproject.eop.common.representation.parse.tree.dependency.basic.BasicNode; +import eu.excitementproject.eop.common.utilities.StringUtil; + +/** + * Convenient static methods for printing a tree in one line, using parentheses to determine nesting. + * @author Ofer Bronstein + * @since August 2014 + */ +public class TreeToLineString { + + private TreeToLineString() {} + + + //// Specific Methods /////////////////////////////////////////////////////// + + /////// Single Node + + public static String getStringWordRel(BasicNode tree) { + return getString(tree, new NodeShortString.WordRel()); + } + + public static String getStringWordRelPos(BasicNode tree) { + return getString(tree, new NodeShortString.WordRelPos()); + } + + public static String getStringWordRelCanonicalPos(BasicNode tree) { + return getString(tree, new NodeShortString.WordRelCanonicalPos()); + } + + + /////// Multiple Nodes + + public static String getStringRel(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.Rel()); + } + + public static String getStringRelPrep(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPrep()); + } + + public static String getStringRelPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPos()); + } + + public static String getStringRelPrepPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPrepPos()); + } + + public static String getStringRelCanonicalPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.RelCanonicalPos()); + } + + public static String getStringRelPrepCanonicalPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPrepCanonicalPos()); + } + + public static String getStringWordRel(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.WordRel()); + } + + public static String getStringWordRelPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.WordRelPos()); + } + + public static String getStringWordRelCanonicalPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.WordRelCanonicalPos()); + } + + + //// Generic Methods /////////////////////////////////////////////////////// + + public static String getString(BasicNode tree, NodeShortString nodeStr) { + return getString(tree, "(", ")", nodeStr); + } + + public static String getString(List trees, boolean withContext, boolean withMagicNodes, NodeShortString nodeStr) { + if (trees.isEmpty()) { + return "(empty-tree)"; + } + String subrootDep = null; + if (!withContext) { + subrootDep = ""; + } + return getString(trees, "(", ")", "#", subrootDep, withMagicNodes, nodeStr); + } + + public static String getString(BasicNode root, String pre, String post, String dep, boolean withMagicNodes, NodeShortString str) { + return getStringSubtree(root, str, pre, post, dep, withMagicNodes).toString().trim(); + } + + public static String getString(BasicNode root, String pre, String post, NodeShortString str) { + return getStringSubtree(root, str, pre, post, null, true).toString().trim(); + } + + public static String getString(BasicNode root, String pre, String post, boolean withMagicNodes, NodeShortString str) { + return getStringSubtree(root, str, pre, post, null, withMagicNodes).toString().trim(); + } + + public static String getString(Collection trees, String pre, String post, String treeSeparator, String dep, boolean withMagicNodes, NodeShortString str) { + List strings = new ArrayList(trees.size()); + for (BasicNode root : trees) { + strings.add(getString(root, pre, post, dep, withMagicNodes, str)); + } + return StringUtil.join(strings, treeSeparator); + } + + public static String getString(Collection trees, String pre, String post, boolean withMagicNodes, NodeShortString str) { + return getString(trees, pre, post, null, "#", withMagicNodes, str); + } + + protected static StringBuffer getStringSubtree(BasicNode subtree, NodeShortString str, String pre, String post, String dep, boolean withMagicNodes) { + final String NULL_TREE_STR = "(null)"; + StringBuffer result = new StringBuffer(); + + if (subtree == null) { + result.append(NULL_TREE_STR); + } + else { + if (subtree.getInfo().getNodeInfo().getWord() != null) { + String nodeDep; + if (dep != null) { + nodeDep = dep; + } + else { + nodeDep = str.toString(subtree); + } + + // "Magic Node" data should just be added to nodeDep + if ( withMagicNodes && + subtree.getInfo().getNodeInfo().getWordLemma()!=null && + MAGIC_NODES.contains(subtree.getInfo().getNodeInfo().getWordLemma())) { + nodeDep += subtree.getInfo().getNodeInfo().getWordLemma(); + } + + result.append(nodeDep); + } + + if (subtree.getChildren() != null) { + for (BasicNode child : subtree.getChildren()) { + result.append(pre); + result.append(getStringSubtree(child, str, pre, post, null, withMagicNodes)); + result.append(post); + } + } + } + + return result; + } + + + // "Magic Nodes" are one with specific importance for a tree/fragment, and should be printed accordingly + public static final String MAGIC_NODE_PREDICATE = "[PRD]"; + public static final String MAGIC_NODE_ARGUMENT = "[ARG]"; + public static final Set MAGIC_NODES = new HashSet(Arrays.asList(new String[] {MAGIC_NODE_PREDICATE, MAGIC_NODE_ARGUMENT})); + +} diff --git a/common/src/main/java/eu/excitementproject/eop/common/representation/partofspeech/GermanPartOfSpeech.java b/common/src/main/java/eu/excitementproject/eop/common/representation/partofspeech/GermanPartOfSpeech.java index 483d6dab..643ab289 100644 --- a/common/src/main/java/eu/excitementproject/eop/common/representation/partofspeech/GermanPartOfSpeech.java +++ b/common/src/main/java/eu/excitementproject/eop/common/representation/partofspeech/GermanPartOfSpeech.java @@ -54,7 +54,7 @@ protected void setCanonicalPosTag() { */ // Mapping STTS -> DKPro POS can be found at - // http://code.google.com/p/dkpro-core-asl/source/browse/de.tudarmstadt.ukp.dkpro.core-asl/trunk/de.tudarmstadt.ukp.dkpro.core.api.lexmorph-asl/src/main/resources/de/tudarmstadt/ukp/dkpro/core/api/lexmorph/tagset/de-stts-tagger.map + // http://code.google.com/p/dkpro-core-asl/source/browse/de.tudarmstadt.ukp.dkpro.core-asl/trunk/de.tudarmstadt.ukp.dkpro.core.api.lexmorph-asl/src/main/resources/de/tudarmstadt/ukp/dkpro/core/api/lexmorph/tagset/de-stts-pos.map private String mapOntoStts(String posTagString) { if (posTagString.startsWith("ADJ")) { diff --git a/common/src/main/java/eu/excitementproject/eop/common/utilities/uima/UimaUtils.java b/common/src/main/java/eu/excitementproject/eop/common/utilities/uima/UimaUtils.java index 9e74daf2..14072ff6 100644 --- a/common/src/main/java/eu/excitementproject/eop/common/utilities/uima/UimaUtils.java +++ b/common/src/main/java/eu/excitementproject/eop/common/utilities/uima/UimaUtils.java @@ -8,11 +8,17 @@ import java.io.IOException; import java.io.InputStream; import java.net.URL; +import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.commons.lang.builder.EqualsBuilder; import org.apache.uima.UIMAFramework; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.cas.CAS; +import org.apache.uima.cas.CASException; import org.apache.uima.cas.Type; import org.apache.uima.cas.impl.Subiterator; import org.apache.uima.cas.impl.XmiCasDeserializer; @@ -31,6 +37,9 @@ import org.uimafit.util.JCasUtil; import org.xml.sax.SAXException; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency; + /** * Generic convenience methods when using UIMA. * @@ -191,14 +200,14 @@ public static TypeSystemDescription loadTypeSystem(String typeSystemDescriptorPa * Does not use subiterators. * * @param - * the JCas type. + * the required annotation type to be retrieved. * @param jCas * a JCas containing the annotation. * @param type - * a UIMA type. + * the required annotation type to be retrieved. * @param coveringAnnotation * the covering annotation. - * @return the single instance of the given type. + * @return the single instance of the required type. * @throws IllegalArgumentException if not exactly one instance if the given type is present * under the covering annotation. * @see Subiterator @@ -207,12 +216,36 @@ public static TypeSystemDescription loadTypeSystem(String typeSystemDescriptorPa * @since April 2014 */ @SuppressWarnings("unchecked") - public static T selectCoveredSingle(JCas jCas, final Class type, - AnnotationFS coveringAnnotation) { + public static T selectCoveredSingle(JCas jCas, final Class type, AnnotationFS coveringAnnotation) { return (T) selectCoveredSingle(jCas.getCas(), JCasUtil.getType(jCas, type), coveringAnnotation); } + /** + * Get the annotation of the given annotation type constrained by a 'covering' annotation. + * Iterates over all annotations of the given type to find the covered annotations. + * Does not use subiterators. + * + * @param + * the required annotation type to be retrieved. + * @param jCas + * a JCas containing the annotation. + * @param type + * the required annotation type to be retrieved. + * @param coveringAnnotation + * the covering annotation. + * @return the single instance of the required type. + * @throws IllegalArgumentException if not exactly one instance if the given type is present + * under the covering annotation. + * @see Subiterator + * + * @author Ofer Bronstein + * @since April 2014 + */ + public static T selectCoveredSingle(final Class type, AnnotationFS coveringAnnotation) throws CASException { + return selectCoveredSingle(coveringAnnotation.getCAS().getJCas(), type, coveringAnnotation); + } + /** * Get the annotation of the given annotation type constrained by a 'covering' annotation. * Iterates over all annotations of the given type to find the covered annotations. @@ -221,10 +254,10 @@ public static T selectCoveredSingle(JCas jCas, final Clas * @param cas * a cas containing the annotation. * @param type - * a UIMA type. + * the required annotation type to be retrieved. * @param coveringAnnotation * the covering annotation. - * @return the single instance of the given type. + * @return the single instance of the required type. * @throws IllegalArgumentException if not exactly one instance if the given type is present * under the covering annotation. * @see Subiterator @@ -232,19 +265,98 @@ public static T selectCoveredSingle(JCas jCas, final Clas * @author Ofer Bronstein * @since April 2014 */ - public static AnnotationFS selectCoveredSingle(CAS cas, Type type, - AnnotationFS coveringAnnotation) { + public static AnnotationFS selectCoveredSingle(CAS cas, Type type, AnnotationFS coveringAnnotation) { List annotations = CasUtil.selectCovered(cas, type, coveringAnnotation); if (annotations.isEmpty()) { - throw new IllegalArgumentException("CAS does not contain any [" + type.getName() + "]"); + throw new IllegalArgumentException("No annotations of type [" + type.getName() + "] in selected range"); } if (annotations.size() > 1) { - throw new IllegalArgumentException("CAS contains more than one [" + type.getName() - + "]"); + throw new IllegalArgumentException("More than one annotation of type [" + type.getName() + + "] in selected range"); } return annotations.get(0); } + public static String annotationToString(A anno) { + return String.format("%s[%s:%s]", anno.getCoveredText(), anno.getBegin(), anno.getEnd()); + } + + public static String annotationToString(Token token) { + return annotationToString(token, true, true); + } + + public static String annotationToString(Token token, boolean writeLemma, boolean writePOS) { + String strLemma = ""; + if (writeLemma && (token.getLemma() != null && !token.getLemma().getValue().isEmpty())) { + strLemma = String.format("(%s)", token.getLemma().getValue()); + } + String strPOS = ""; + if (writePOS && (token.getPos() != null && !token.getPos().getPosValue().isEmpty())) { + strPOS = String.format("/%s", token.getPos().getPosValue()); + } + return String.format("%s%s%s[%s:%s]", token.getCoveredText(), strLemma, strPOS, token.getBegin(), token.getEnd()); + } + + public static String annotationToString(Dependency dep) { + Token dependent = dep.getDependent(); + Token governer = dep.getGovernor(); + return String.format("%s(%s->%s)", dep.getDependencyType(), + annotationToString(dependent, false, false), annotationToString(governer, false, false)); + } + + public static String annotationIterableToString(Iterable annos) { + List strs = new ArrayList(); + for (A anno : annos) { + String annoStr; + if (anno instanceof Token) { + annoStr = annotationToString((Token) anno); + } + else if (anno instanceof Dependency) { + annoStr = annotationToString((Dependency) anno); + } + else { + annoStr = annotationToString(anno); + } + strs.add(annoStr); + } + return String.format("[%s]", StringUtils.join(strs, ", ")); + } + + public static String annotationCollectionToString(Collection annos) { + return String.format("%s/%s", annos.size(), annotationIterableToString(annos)); + } + + /** + * Calculates an annotation's hash code only by its type and span (begin..end). + * This is useful for times when you can assume that only one annotation of a certain type + * will be on a specific span (when that's not the case you'll get the same hash code + * for multiple distinct annotations, so don't use this!). + * + * @param anno + * @return + */ + public static int hashCodeAnnotationByTypeAndSpan(Annotation anno) { + return new HashCodeBuilder(131, 79).append(anno.getType()).append(anno.getBegin()).append(anno.getEnd()).toHashCode(); + } + + /** + * Check whether two annotations are equals only by their type and span (begin..end). + * This is useful for times when you can assume that only one annotation of a certain type + * will be on a specific span (when that's not the case you'll get equality + * for multiple distinct annotation, so don't use this!). + * + * @param anno + * @return + */ + public static boolean equalsAnnotationByTypeAndSpan(Object obj1, Object obj2) { + if (obj1 == null && obj2 == null) { return true; } + if (obj1 == null || obj2 == null) { return false; } + if (obj1 == obj2) { return true; } + if (obj1.getClass() != obj2.getClass()) { return false; } + Annotation anno1 = (Annotation) obj1; + Annotation anno2 = (Annotation) obj2; + return new EqualsBuilder().append(anno1.getBegin(), anno2.getBegin()).append(anno1.getEnd(), anno2.getEnd()).isEquals(); + } } diff --git a/lap/src/main/resources/desc/DummyAE.xml b/common/src/main/resources/desc/DummyAE.xml similarity index 92% rename from lap/src/main/resources/desc/DummyAE.xml rename to common/src/main/resources/desc/DummyAE.xml index 3b805feb..4a497d58 100644 --- a/lap/src/main/resources/desc/DummyAE.xml +++ b/common/src/main/resources/desc/DummyAE.xml @@ -2,7 +2,7 @@ org.apache.uima.java true - eu.excitementproject.eop.lap.DummyAE + eu.excitement.type.DummyAE DummyAE This AE does nothing. But it holds all EXCITEMENT typesystem, and (banilla) UIMA needs an AE to generate CAS(and JCas). diff --git a/lap/src/main/resources/desc/EOPtypesystem.xml b/common/src/main/resources/desc/EOPtypesystem.xml similarity index 95% rename from lap/src/main/resources/desc/EOPtypesystem.xml rename to common/src/main/resources/desc/EOPtypesystem.xml index 81a2a837..fa2d5caa 100644 --- a/lap/src/main/resources/desc/EOPtypesystem.xml +++ b/common/src/main/resources/desc/EOPtypesystem.xml @@ -17,5 +17,6 @@ + diff --git a/common/src/main/resources/desc/type/PredicateTruth.xml b/common/src/main/resources/desc/type/PredicateTruth.xml index e7591106..d6d97b08 100644 --- a/common/src/main/resources/desc/type/PredicateTruth.xml +++ b/common/src/main/resources/desc/type/PredicateTruth.xml @@ -1,163 +1,139 @@ - - PredicateTruth - - 1.0 - - - - eu.excitement.type.predicatetruth.PredicateTruthValue - This type provides labels for PredicateTruth. This type is a string subtype that only permits "PT+", "PT-", and "PT?". - - uima.cas.String - - - PT+ - - - - PT- - - - - PT? - - - - - - eu.excitement.type.predicatetruth.PredicateTruth - This type represents a predicate truth value annotation. - - uima.tcas.Annotation - - - value - This represents the value of the annotation. - - eu.excitement.type.predicatetruth.PredicateTruthValue - - - - - eu.excitement.type.predicatetruth.ClauseTruthValue - This type provides labels for ClauseTruth. This type is a string subtype that only permits "CT+", "CT-", and "CT?". - - uima.cas.String - - - CT+ - - - - CT- - - - - CT? - - - - - - eu.excitement.type.predicatetruth.ClauseTruth - This type represents a clause truth value annotation. - - uima.tcas.Annotation - - - value - This represents the value of the annotation. - - eu.excitement.type.predicatetruth.ClauseTruthValue - - - - - eu.excitement.type.predicatetruth.NegationAndUncertainty - This type represents a negation-and-uncertainty annotation. - - uima.tcas.Annotation - - - value - This represents the value of the annotation. - - eu.excitement.type.predicatetruth.NegationAndUncertaintyValue - - - - - eu.excitement.type.predicatetruth.NegationAndUncertaintyValue - This type provides labels for NegationAndUncerntainty. This type is a string -subtype that only permits "NU+", "NU-", and "NU?". - - uima.cas.String - - - NU+ - - - - NU- - - - - NU? - - - - - - eu.excitement.type.predicatetruth.PredicateSignature - This type represents an implication signature of a predicate. - - uima.tcas.Annotation - - - eu.excitement.type.predicatetruth.PredicateSignatureValue - This type provides labels for PredicateSignature. This type is a string subtype -that only permits one of the following strings: "+ / -", "+ / ?", "? / -", "- \ +", "- / ?", "? / +", "+ / -+", "- / -", "? / ?". - - uima.cas.String - - - +/- - - - - +/? - - - - ?/- - - - - -/+ - - - - -/? - - - - ?/+ - - - - +/+ - - - - -/- - - - - ?/? - - - - - - + + + TruthAnnotations + Represents a truth value. + 1.0 + + + + eu.excitement.type.predicatetruth.PredicateTruth + This type represents a predicate truth value annotation. +It is an abstract representation from which the different Predicate Truth annotations will inherit (PT+,PT-,PT?). +This annotation covers a single predicate token. + uima.tcas.Annotation + + + eu.excitement.type.predicatetruth.PredicateTruthPositive + This type annotates predicates with PT+. + eu.excitement.type.predicatetruth.PredicateTruth + + + eu.excitement.type.predicatetruth.PredicateTruthNegative + This type annotates predicates with PT-. + eu.excitement.type.predicatetruth.PredicateTruth + + + eu.excitement.type.predicatetruth.PredicateTruthUncertain + This type annotates predicates with PT?. + eu.excitement.type.predicatetruth.PredicateTruth + + + eu.excitement.type.predicatetruth.PredicateTruthNotIdentified + + eu.excitement.type.predicatetruth.PredicateTruth + + + eu.excitement.type.predicatetruth.ClauseTruth + + uima.tcas.Annotation + + + clauseTokens + + uima.cas.FSArray + uima.tcas.Annotation + + + + + eu.excitement.type.predicatetruth.ClauseTruthPositive + + eu.excitement.type.predicatetruth.ClauseTruth + + + eu.excitement.type.predicatetruth.ClauseTruthNegative + + eu.excitement.type.predicatetruth.ClauseTruth + + + eu.excitement.type.predicatetruth.ClauseTruthUncertain + + eu.excitement.type.predicatetruth.ClauseTruth + + + eu.excitement.type.predicatetruth.ClauseTruthNotIdentified + + eu.excitement.type.predicatetruth.ClauseTruth + + + eu.excitement.type.predicatetruth.NegationAndUncertainty + + uima.tcas.Annotation + + + eu.excitement.type.predicatetruth.NegationAndUncertaintyPositive + + eu.excitement.type.predicatetruth.NegationAndUncertainty + + + eu.excitement.type.predicatetruth.NegationAndUncertaintyNegative + + eu.excitement.type.predicatetruth.NegationAndUncertainty + + + eu.excitement.type.predicatetruth.NegationAndUncertaintyUncertain + + eu.excitement.type.predicatetruth.NegationAndUncertainty + + + eu.excitement.type.predicatetruth.PredicateSignature + + uima.tcas.Annotation + + + eu.excitement.type.predicatetruth.PredicateSignaturePositivePositive + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignaturePositiveNegative + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignaturePositiveUncertain + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureNegativePositive + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureNegativeNegative + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureNegativeUncertain + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureUncertainPositive + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureUncertainNegative + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureUncertainUncertain + + eu.excitement.type.predicatetruth.PredicateSignature + + + diff --git a/common/src/test/java/eu/excitement/type/alignment/LinkTest.java b/common/src/test/java/eu/excitement/type/alignment/LinkTest.java new file mode 100644 index 00000000..562c8d58 --- /dev/null +++ b/common/src/test/java/eu/excitement/type/alignment/LinkTest.java @@ -0,0 +1,62 @@ +package eu.excitement.type.alignment; + +import static org.junit.Assert.*; + +import java.util.Set; + +import junit.framework.Assert; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Test; + +import eu.excitementproject.eop.common.utilities.uima.UimaUtils; + +public class LinkTest { + + @Test + public void test() { + + // Set Log4J for the test + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.INFO); // for UIMA (hiding < INFO) + Logger testlogger = Logger.getLogger(getClass()); + + try { + // a small test for group label setter, getter. + JCas aJCas = UimaUtils.newJcas(); + Link aLink = new Link(aJCas); + + aLink.addGroupLabel(GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + aLink.addGroupLabel(GroupLabelInferenceLevel.LOCAL_SIMILARITY); + aLink.addGroupLabel(GroupLabelDomainLevel.HYPERNYM); + aLink.addGroupLabel(GroupLabelDomainLevel.HYPERNYM); + aLink.addGroupLabel(GroupLabelDomainLevel.HYPERNYM); // additional labels would be ignored when you use "getter" (getter returns a set). + + Set iSet = aLink.getGroupLabelsInferenceLevel(); + Set dSet = aLink.getGroupLabelsDomainLevel(); + + Assert.assertEquals(2, iSet.size()); + testlogger.info(iSet); + Assert.assertEquals(1, dSet.size()); + testlogger.info(dSet); + + // empty set test + aJCas = UimaUtils.newJcas(); + aLink = new Link(aJCas); + + iSet = aLink.getGroupLabelsInferenceLevel(); + dSet = aLink.getGroupLabelsDomainLevel(); + Assert.assertEquals(0, iSet.size()); + Assert.assertEquals(0, dSet.size()); + + } + catch (Exception e) + { + fail(e.getMessage()); + } + } +} diff --git a/core/pom.xml b/core/pom.xml index b0802d0f..fcaf0647 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -3,12 +3,17 @@ eu.excitementproject eop - 1.1.3 + 1.1.4 core core core + + dfki + nemex + 1.0 + junit junit @@ -17,13 +22,13 @@ eu.excitementproject common - 1.1.3 + 1.1.4 eu.excitementproject distsim - 1.1.3 + 1.1.4 @@ -52,7 +57,7 @@ eu.excitementproject lap - 1.1.3 + 1.1.4 unituebingen @@ -110,9 +115,23 @@ nz.ac.waikato.cms.weka weka-stable 3.6.9 - - + + edu.cmu.parex + meteor-paraphrase-en + 1.5 + + + edu.cmu.parex + meteor-paraphrase-de + 1.5 + + + eu.fbk + vivis-paraphrase-it + 1.2 + + @@ -136,7 +155,12 @@ --> - + + + eu.excitementproject + lexicalminer + 1.1.4 + @@ -163,4 +187,4 @@ http://zoidberg.ukp.informatik.tu-darmstadt.de/artifactory/public-ext-releases-local - + \ No newline at end of file diff --git a/core/src/main/java/eu/excitementproject/eop/core/EditDistanceEDA.java b/core/src/main/java/eu/excitementproject/eop/core/EditDistanceEDA.java index 374ae250..46328aff 100644 --- a/core/src/main/java/eu/excitementproject/eop/core/EditDistanceEDA.java +++ b/core/src/main/java/eu/excitementproject/eop/core/EditDistanceEDA.java @@ -54,17 +54,17 @@ public class EditDistanceEDA * the threshold that has to be learnt on a training set and then used * to annotate examples in the test set */ - private double threshold; + protected double threshold; /** * the accuracy obtained on the training data set */ - private double trainingAccuracy; + protected double trainingAccuracy; /** * the edit distance component to be used */ - private FixedWeightEditDistance component; + protected FixedWeightEditDistance component; /** * the logger @@ -74,17 +74,17 @@ public class EditDistanceEDA /** * the language */ - private String language; + protected String language; /** * the training data directory */ - private String trainDIR; + protected String trainDIR; /** * the test data directory */ - private String testDIR; + protected String testDIR; /** * if the model produced during the training phase @@ -92,32 +92,32 @@ public class EditDistanceEDA * if it should stay in the memory for further calculation * (e.g. EditDistancePSOEDA uses this modality) */ - private boolean writeModel; + protected boolean writeModel; /** * weight for match */ - private double mMatchWeight; + protected double mMatchWeight; /** * weight for delete */ - private double mDeleteWeight; + protected double mDeleteWeight; /** * weight for insert */ - private double mInsertWeight; + protected double mInsertWeight; /** * weight for substitute */ - private double mSubstituteWeight; + protected double mSubstituteWeight; /** * measure to optimize: accuracy or f1 measure */ - private String measureToOptimize; + protected String measureToOptimize; /** * if the EDA has to write the learnt model at the end of the training phase @@ -604,7 +604,7 @@ public void startTraining(CommonConfig config) throws ConfigurationException, ED * * @throws ConfigurationException */ - private void checkConfiguration(CommonConfig config) + protected void checkConfiguration(CommonConfig config) throws ConfigurationException { if (config == null) @@ -619,7 +619,7 @@ private void checkConfiguration(CommonConfig config) * * @throws ComponentException, EDAException, Exception */ - private double[] sequentialSearch(List annotationList, String measureToOptimize) + protected double[] sequentialSearch(List annotationList, String measureToOptimize) throws ComponentException, EDAException, Exception { //double[0] is the calculated threshold @@ -729,7 +729,7 @@ private double[] sequentialSearch(List annotationList, String measur * * @return the pair identifier */ - private String getPairId(JCas jcas) { + protected String getPairId(JCas jcas) { FSIterator pairIter = jcas.getJFSIndexRepository().getAllIndexedFS(Pair.type); @@ -752,7 +752,7 @@ private String getPairId(JCas jcas) { * * @return a copy of the specified list sorted in increasing order */ - private List sortAnnotationList(List annotationList) { + protected List sortAnnotationList(List annotationList) { List newAnnotationList = new ArrayList(annotationList); @@ -781,7 +781,7 @@ public int compare(Annotation a1, Annotation a2) { * * @throws DistanceComponentException */ - private void getDistanceValues(JCas jcas, List distanceValues) + protected void getDistanceValues(JCas jcas, List distanceValues) throws DistanceComponentException { try { @@ -805,7 +805,7 @@ private void getDistanceValues(JCas jcas, List distanceValues) * * @throws Exception */ - private void getEntailmentAnnotation(JCas jcas, List entailmentValueList) + protected void getEntailmentAnnotation(JCas jcas, List entailmentValueList) throws Exception { try { @@ -833,7 +833,7 @@ private void getEntailmentAnnotation(JCas jcas, List entailmentValueList * * @throws Exception */ - private List merge(List distanceValues, List entailmentValueList) + protected List merge(List distanceValues, List entailmentValueList) throws Exception { List annotationList = new ArrayList(); @@ -856,7 +856,7 @@ private List merge(List distanceValues, List /** * Save the optimized parameters (e.g. threshold) into the configuration file itself */ - private void saveModel(CommonConfig config) throws IOException { + protected void saveModel(CommonConfig config) throws IOException { logger.info("Writing model ..."); @@ -890,7 +890,7 @@ private void saveModel(CommonConfig config) throws IOException { * @param config the configuration * */ - private void initializeThreshold(CommonConfig config) { + protected void initializeThreshold(CommonConfig config) { try{ @@ -983,7 +983,7 @@ protected String updateConfigurationFile(CommonConfig config) throws IOException /* * This class represents the entailment annotation between a T/H pair with its edit distance. */ - private class Annotation { + protected class Annotation { DistanceValue distance; String entailmentRelation; diff --git a/core/src/main/java/eu/excitementproject/eop/core/EditDistanceEDAwRedis.java b/core/src/main/java/eu/excitementproject/eop/core/EditDistanceEDAwRedis.java new file mode 100644 index 00000000..77be4ee1 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/EditDistanceEDAwRedis.java @@ -0,0 +1,279 @@ +package eu.excitementproject.eop.core; + +import java.io.File; +import java.lang.reflect.Constructor; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Logger; + +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.DecisionLabel; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.TEDecision; +import eu.excitementproject.eop.common.component.distance.DistanceComponentException; +import eu.excitementproject.eop.common.component.distance.DistanceValue; +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.configuration.NameValueTable; +import eu.excitementproject.eop.common.exception.ComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.core.component.distance.FixedWeightEditDistancewRedis; +import eu.excitementproject.eop.lap.PlatformCASProber; + + +/** + * The EditDistanceEDA class implements the EDABasic interface. + * Given a certain configuration, it can be trained over a specific data set in order to optimize its + * performance. In the training phase this class produces a distance model for the data set, which + * includes a distance threshold that best separates the positive and negative examples in the training data. + * During the test phase it applies the calculated threshold, so that pairs resulting in a distance below the + * threshold are classified as ENTAILMENT, while pairs above the threshold are classified as NONENTAILMENT. + * EditDistanceEDA uses FixedWeightEditDistance for calculating edit distance + * between each pair of T and H. + * + * @author Roberto Zanoli + * + */ +public class EditDistanceEDAwRedis + extends EditDistanceEDA { + + /** + * the logger + */ + static Logger logger = Logger.getLogger(EditDistanceEDAwRedis.class.getName()); + + protected FixedWeightEditDistancewRedis component; + + /** + * Construct an edit distance EDA. + */ + public EditDistanceEDAwRedis() { + super(); + } + + /** + * Construct an edit distance EDA with the weights of the edit distance operations set + * + * @param mMatchWeight weight for match + * @param mDeleteWeight weight for delete + * @param mInsertWeight weight for insert + * @param mSubstituteWeight weight for substitute + * + */ + public EditDistanceEDAwRedis(double mMatchWeight, double mDeleteWeight, double mInsertWeight, double mSubstituteWeight) { + + super(mMatchWeight, mDeleteWeight, mInsertWeight, mSubstituteWeight); + } + + @Override + public void initialize(CommonConfig config) throws ConfigurationException, EDAException, ComponentException { + + logger.info("Initialization ..."); + + try { + + //checking the configuration file + checkConfiguration(config); + + logger.info("Getting section: " + this.getType()); + + //getting the name value table of the EDA + NameValueTable nameValueTable = config.getSection(this.getType()); + + logger.info("Setting the train and test dirs for LAP"); + + //setting the training directory + if (this.trainDIR == null) + this.trainDIR = nameValueTable.getString("trainDir"); + + //setting the test directory + if (this.testDIR == null) + this.testDIR = nameValueTable.getString("testDir"); + + //initializing the threshold value + initializeThreshold(config); + + //initializing the weight of the edit distant operations + initializeWeights(config); + + //component initialization + String componentName = nameValueTable.getString("components"); + + logger.info("Will initialize component " + componentName); + + if (component == null) { + + try { + + Class componentClass = Class.forName(componentName); + logger.info("Using:" + componentClass.getCanonicalName()); + Constructor componentClassConstructor = componentClass.getConstructor(CommonConfig.class); + this.component = (FixedWeightEditDistancewRedis) componentClassConstructor.newInstance(config); + this.component.setmMatchWeight(mMatchWeight); + this.component.setmDeleteWeight(mDeleteWeight); + this.component.setmInsertWeight(mInsertWeight); + this.component.setmSubstituteWeight(mSubstituteWeight); + + } catch (Exception e) { + throw new ComponentException(e.getMessage()); + } + + } + + //setting the measure to be optimized + if (this.measureToOptimize == null) + this.measureToOptimize = nameValueTable.getString("measure"); + + } catch (ConfigurationException e) { + throw e; + } catch (Exception e) { + throw new EDAException(e.getMessage()); + } + + logger.info("done."); + + } + + @Override + public EditDistanceTEDecision process(JCas jcas) throws EDAException, ComponentException { + + String pairId = getPairId(jcas); + + //the distance between the T-H pair + DistanceValue distanceValue = component.calculation(jcas); + double distance = distanceValue.getDistance(); + + // During the test phase the method applies the threshold, so that + // pairs resulting in a distance below the threshold are classified as ENTAILMENT, while pairs + // above the threshold are classified as NONENTAILMENT. + if (distance <= this.threshold) + return new EditDistanceTEDecision(DecisionLabel.Entailment, pairId, threshold - distance); + + return new EditDistanceTEDecision(DecisionLabel.NonEntailment, pairId, distance - threshold); + + } + + @Override + public void shutdown() { + + logger.info("Shutting down ..."); + logger.info("Number of rules used: " + FixedWeightEditDistancewRedis.ruleCounter); + + if (component != null) + component.shutdown(); + + this.threshold = Double.NaN; + this.component = null; + this.writeModel = true; + this.mMatchWeight = Double.NaN; + this.mDeleteWeight = Double.NaN; + this.mInsertWeight = Double.NaN; + this.mSubstituteWeight = Double.NaN; + this.trainDIR = null; + this.trainDIR = null; + this.language = null; + this.measureToOptimize = null; + + logger.info("done."); + } + + @Override + public void startTraining(CommonConfig config) throws ConfigurationException, EDAException, ComponentException { + + logger.info("Training ..."); + + try { + + initialize(config); + + //contains the distance between each pair of T-H + List distanceValueList = new ArrayList(); + //contains the entailment annotation between each pair of T-H + List entailmentValueList = new ArrayList(); + //contains the entailment annotation and the distance between each pair of T-H + List annotationList; + + File f = new File(trainDIR); + if (f.exists() == false) { + throw new ConfigurationException("trainDIR:" + f.getAbsolutePath() + " not found!"); + } + + logger.info(f.listFiles().length + " files to process from " + f.getName() ); + + int filesCounter = 0; + for (File xmi : f.listFiles()) { + if (!xmi.getName().endsWith(".xmi")) { + continue; + } + + JCas cas = PlatformCASProber.probeXmi(xmi, null); + + getDistanceValues(cas, distanceValueList); + getEntailmentAnnotation(cas, entailmentValueList); + + filesCounter++; + } + + annotationList = merge(distanceValueList, entailmentValueList); + + if (filesCounter == 0) + throw new ConfigurationException("trainDIR:" + f.getAbsolutePath() + " empty!"); + + //array of two elements; the first element is the calculated threshold whereas the + //second one is the obtained accuracy + double[] thresholdAndAccuracy = + //sequentialSearch(distanceValueList, entailmentValueList, measureToOptimize); + sequentialSearch(annotationList, measureToOptimize); + + this.threshold = thresholdAndAccuracy[0]; + this.trainingAccuracy = thresholdAndAccuracy[1]; + + //it saves the calculated model into the configuration file itself + if (this.writeModel == true) + saveModel(config); + + } catch (ConfigurationException e) { + throw e; + } catch (EDAException e) { + throw e; + } catch (ComponentException e) { + throw e; + } catch (Exception e) { + throw new EDAException(e.getMessage()); + } + + logger.info("done."); + logger.info("Number of LR rules used in training: " + FixedWeightEditDistancewRedis.ruleCounter); + FixedWeightEditDistancewRedis.ruleCounter = 0; + + } + + + + /** + * Puts distance values calculating for each of the pair T and H + * of the specified list of Cas into the distanceValues list. + * Each of the Cas of the list contains a single pair T-H + * + * @param jcas the list of CAS + * @param distanceValues the list of the distance values + * + * @throws DistanceComponentException + */ + @Override + protected void getDistanceValues(JCas jcas, List distanceValues) + throws DistanceComponentException { + + try { + + DistanceValue distanceValue = component.calculation(jcas); + distanceValues.add(distanceValue); + + } catch(DistanceComponentException e) { + throw e; + } + + } + + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/MaxEntClassificationEDA.java b/core/src/main/java/eu/excitementproject/eop/core/MaxEntClassificationEDA.java index 7df231fb..429b2d2a 100644 --- a/core/src/main/java/eu/excitementproject/eop/core/MaxEntClassificationEDA.java +++ b/core/src/main/java/eu/excitementproject/eop/core/MaxEntClassificationEDA.java @@ -43,6 +43,8 @@ import eu.excitementproject.eop.lap.LAPException; import eu.excitementproject.eop.lap.PlatformCASProber; +import de.dfki.lt.nemex.a.NEMEX_A; + /** * The MaxEntClassificationEDA class implements the * EDABasic interface. @@ -64,7 +66,7 @@ * 5) (optional) settings for the classifier, the maximum number of iterations * and the cut-off threshold. * - * @author Rui Wang + * @author Rui Wang, Madhumita * @since December 2012 */ public class MaxEntClassificationEDA implements @@ -244,6 +246,17 @@ private void initializeComponents(CommonConfig config) } else { initializeLexCompsEN(config); } + } else if (component.equals("NemexA")) { + String gazetteerFilePath = comp.getString("gazetteerFilePath"); + String delimiter = comp.getString("delimiter"); + Boolean delimiterSwitchOff = Boolean.valueOf(comp + .getString("delimiterSwitchOff")); + int nGramSize = Integer.parseInt(comp.getString("nGramSize")); + Boolean ignoreDuplicateNgrams = Boolean.valueOf(comp + .getString("ignoreDuplicateNgrams")); + NEMEX_A.loadNewGazetteer(gazetteerFilePath, delimiter, + delimiterSwitchOff, nGramSize, ignoreDuplicateNgrams); + } else { try { @SuppressWarnings("unchecked") diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/LexicalAligner.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/LexicalAligner.java new file mode 100644 index 00000000..9a0f7ae0 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/LexicalAligner.java @@ -0,0 +1,754 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink; + +import java.lang.reflect.Constructor; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.FSArray; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.alignment.GroupLabelDomainLevel; +import eu.excitement.type.alignment.GroupLabelInferenceLevel; +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.Link.Direction; +import eu.excitement.type.alignment.Target; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalResource; +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalResourceCloseException; +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalResourceException; +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalRule; +import eu.excitementproject.eop.common.component.lexicalknowledge.RuleInfo; +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.configuration.NameValueTable; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.common.representation.partofspeech.ByCanonicalPartOfSpeech; +import eu.excitementproject.eop.common.representation.partofspeech.PartOfSpeech; +import eu.excitementproject.eop.common.representation.partofspeech.UnsupportedPosTagStringException; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationFile; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationParams; +import eu.excitementproject.eop.core.component.lexicalknowledge.verb_ocean.VerbOceanRuleInfo; +import eu.excitementproject.eop.core.component.lexicalknowledge.wordnet.WordnetRuleInfo; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * Produces alignment links between the text and the hypothesis, + * based on lexical rules: if T contains a phrase t, H contains + * a phrase h and a lexical resource contains one of the rules + * t->h or h->t, then an alignment link between t and h will be + * created. + *

+ * Usage: Align a sentence pair by calling {@link #annotate(JCas)} method. + * Configure the aligner using the LexicalAligner.xml configuration file. + * When the {@linkplain Aligner} object is no longer to be used, the + * {@link #cleanUp()} method should be called. + * + * @author Vered Shwartz + * @since 26/05/2014 + */ +public class LexicalAligner implements AlignmentComponent { + + // Constants + private static final String LEXICAL_RESOURCES_CONF_SECTION = "LexicalResources"; + private static final String GENERAL_PARAMS_CONF_SECTION = "GeneralParameters"; + private static final String MAX_PHRASE_KEY = "maxPhraseLength"; + private static final String WORDNET = "wordnet"; + private static final String USE_LEMMA_PARAM = "useLemma"; + private static final String LEFT_SIDE_POS_PARAM = "leftSidePOS"; + private static final String RIGHT_SIDE_POS_PARAM = "rightSidePOS"; + private static final String VERSION_PARAM = "version"; + + // Private Members + private List textTokens; + private List hypoTokens; + private List> lexicalResources; + private int maxPhrase = 0; + private HashMap lexicalResourcesInformation; + private static final Logger logger = Logger.getLogger(LexicalAligner.class); + private static final HashMap linkInfoToDomainLevel; + private static final HashMap linkInfoToInferenceLevel; + + // Static Initializer + static { + + // Define the specific relations to domain level table + linkInfoToDomainLevel = new HashMap(); + linkInfoToDomainLevel.put("WORDNET__SYNONYM", GroupLabelDomainLevel.SYNONYM); + linkInfoToDomainLevel.put("WORDNET__HYPERNYM", GroupLabelDomainLevel.HYPERNYM); + linkInfoToDomainLevel.put("WORDNET__INSTANCE_HYPERNYM", GroupLabelDomainLevel.HYPERNYM); + linkInfoToDomainLevel.put("VerbOcean__STRONGER_THAN", GroupLabelDomainLevel.HYPERNYM); + linkInfoToDomainLevel.put("WORDNET__HYPONYM", GroupLabelDomainLevel.HYPONYM); + linkInfoToDomainLevel.put("WORDNET__INSTANCE_HYPONYM", GroupLabelDomainLevel.HYPONYM); + linkInfoToDomainLevel.put("WORDNET__TROPONYM", GroupLabelDomainLevel.HYPONYM); + linkInfoToDomainLevel.put("WORDNET__MEMBER_MERONYM", GroupLabelDomainLevel.MERONYM); + linkInfoToDomainLevel.put("WORDNET__PART_MERONYM", GroupLabelDomainLevel.MERONYM); + linkInfoToDomainLevel.put("WORDNET__SUBSTANCE_MERONYM", GroupLabelDomainLevel.MERONYM); + linkInfoToDomainLevel.put("WORDNET__MEMBER_HOLONYM", GroupLabelDomainLevel.HOLONYM); + linkInfoToDomainLevel.put("WORDNET__PART_HOLONYM", GroupLabelDomainLevel.HOLONYM); + linkInfoToDomainLevel.put("WORDNET__SUBSTANCE_HOLONYM", GroupLabelDomainLevel.HOLONYM); + linkInfoToDomainLevel.put("WORDNET__CAUSE", GroupLabelDomainLevel.CAUSE); + linkInfoToDomainLevel.put("WORDNET__DERIVATIONALLY_RELATED", GroupLabelDomainLevel.DERIVATIONALLY_RELATED); + linkInfoToDomainLevel.put("VerbOcean__HAPPENS_BEFORE", GroupLabelDomainLevel.HAPPENES_BEFORE); + linkInfoToDomainLevel.put("WORDNET__ANTONYM", GroupLabelDomainLevel.ANTONYM); + linkInfoToDomainLevel.put("VerbOcean__OPPOSITE_OF", GroupLabelDomainLevel.ANTONYM); + + // Define the specific relations to inference level table + linkInfoToInferenceLevel = new HashMap(); + linkInfoToInferenceLevel.put("WORDNET__ANTONYM", GroupLabelInferenceLevel.LOCAL_CONTRADICTION); + linkInfoToInferenceLevel.put("VerbOcean__OPPOSITE_OF", GroupLabelInferenceLevel.LOCAL_CONTRADICTION); + linkInfoToInferenceLevel.put("Wikipedia_Redirect", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__SYNONYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__DERIVATIONALLY_RELATED", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("CatVar__local-entailment", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("Wordnet__ENTAILMENT", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("Wikipedia_BeComp", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("Wikipedia_Parenthesis", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("Wikipedia_Category", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__HYPERNYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__INSTANCE_HYPERNYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("VerbOcean__STRONGER_THAN", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__HYPONYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__INSTANCE_HYPONYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__TROPONYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__MEMBER_MERONYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__PART_MERONYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__SUBSTANCE_MERONYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__MEMBER_HOLONYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__PART_HOLONYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__SUBSTANCE_HOLONYM", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__CAUSE", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("VerbOcean__HAPPENS_BEFORE", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("GEO__local-entailment", GroupLabelInferenceLevel.LOCAL_ENTAILMENT); + linkInfoToInferenceLevel.put("WORDNET__SIMILAR_TO", GroupLabelInferenceLevel.LOCAL_SIMILARITY); + linkInfoToInferenceLevel.put("WORDNET__VERB_GROUP", GroupLabelInferenceLevel.LOCAL_SIMILARITY); + linkInfoToInferenceLevel.put("VerbOcean__SIMILAR", GroupLabelInferenceLevel.LOCAL_SIMILARITY); + linkInfoToInferenceLevel.put("Wikipedia_AllNouns", GroupLabelInferenceLevel.LOCAL_SIMILARITY); + linkInfoToInferenceLevel.put("Wikipedia_Link", GroupLabelInferenceLevel.LOCAL_SIMILARITY); + linkInfoToInferenceLevel.put("distsim-lin-proximity__local-entailment", GroupLabelInferenceLevel.LOCAL_SIMILARITY); + linkInfoToInferenceLevel.put("distsim-lin-dependency__local-entailment", GroupLabelInferenceLevel.LOCAL_SIMILARITY); + linkInfoToInferenceLevel.put("distsim-bap__local-entailment", GroupLabelInferenceLevel.LOCAL_SIMILARITY); + } + + // Public Methods + + /** + * Initialize a lexical aligner from the configuration + * @param config a CommonConfig instance. The aligner retrieves the lexical + * resources configuration values. + * @throws AlignmentComponentException if initialization failed + */ + public LexicalAligner(CommonConfig config) throws AlignmentComponentException { + + lexicalResourcesInformation = new HashMap(); + + // Initialize the lexical aligner + try { + init(config); + } catch (ConfigurationException | LexicalResourceException e) { + throw new AlignmentComponentException( + "Could not initialize the lexical aligner", e); + } + } + + /** + * Initialize a lexical aligner using parameters + * @param lexicalResources A set of initialized lexical resources + * @param maxPhrase The maximum length of phrase to align + * @param lexicalResourcesInformation Additional information required for the aligner + * about each of the resources, such as whether this resource uses lemma or surface-level tokens, + * and whether to limit the alignments to certain relations only. + * The lexicalResourcesInformation should hold keys of type: resource.getClass().getName() + */ + public LexicalAligner(List> lexicalResources, + int maxPhrase, + HashMap lexicalResourcesInformation) { + + this.lexicalResources = lexicalResources; + this.lexicalResourcesInformation = lexicalResourcesInformation; + this.maxPhrase = maxPhrase; + } + + /** + * Align the text and the hypothesis. + *

+ * This method receives a JCAS object containing two views: + * Hypothesis and text views. The method assumes that the views + * were already annotated with a tokenizer. + *

+ * The lexical aligner looks at every phrase t in the text and every phrase + * h in the hypothesis, and uses the lexical resources to find rules with + * lhs = t and rhs = h. + * @param aJCas the JCAS object with the text and hypothesis view. + * @throws AlignmentComponentException + */ + @Override + public void annotate(JCas aJCas) throws AlignmentComponentException { + + try { + + logger.info("Started annotating a text and hypothesis pair using lexical aligner"); + + // Get the tokens and lemmas of the text and hypothesis + getTokenAnnotations(aJCas); + + // Check in all the resources for rules of type textPhrase -> hypoPhrase + for (LexicalResource resource : lexicalResources) { + + LexicalResourceInformation resourceInfo = + lexicalResourcesInformation.get(resource.getClass().getName()); + + // For every phrase t in T and phrase h in H, check the lexical + // resources if they contain a rule t->h + String textPhrase = "", hypoPhrase = ""; + + for (int textStart = 0; textStart < textTokens.size(); ++textStart) { + for (int textEnd = textStart; textEnd < Math.min(textTokens.size(), + textStart + maxPhrase); ++textEnd) { + + textPhrase = getPhrase(textTokens, textStart, textEnd, + resourceInfo.useLemma()); + + for (int hypoStart = 0; hypoStart < hypoTokens.size(); ++hypoStart) { + for (int hypoEnd = hypoStart; hypoEnd < Math.min(hypoTokens.size(), + hypoStart + maxPhrase); ++hypoEnd) { + + hypoPhrase = getPhrase(hypoTokens, hypoStart, hypoEnd, + resourceInfo.useLemma()); + + // Get the rules textPhrase -> hypoPhrase + List> ruleFromLeft = + getRules(resource, textPhrase, hypoPhrase, + resourceInfo.getLeftSidePOS(), resourceInfo.getRightSidePOS()); + + // Get the rules hypoPhrase -> textPhrase + List> ruleFromRight = + getRules(resource, hypoPhrase, textPhrase, + resourceInfo.getLeftSidePOS(), resourceInfo.getRightSidePOS()); + + // Create the alignment links for the rules + createAlignmentLinks( + aJCas, textStart, textEnd, + hypoStart, hypoEnd, ruleFromLeft, ruleFromRight, + resourceInfo.getVersion()); + } + } + } + } + } + + logger.info("Finished annotating a text and hypothesis pair using lexical aligner"); + + } catch (CASException | LexicalResourceException e) { + + throw new AlignmentComponentException( + "LexicalAligner failed aligning the sentence pair.", e); + } + } + + @Override + public String getComponentName() { + + // Name of this component that is used to identify the related configuration section + return this.getClass().getName(); + } + + @Override + public String getInstanceName() { + + // This component does not support instance configuration + return null; + } + + /** + * Cleans up any resources that were used by the aligner. + *

+ * Call this method when the aligner is no longer to be used. + */ + public void cleanUp() { + + // Close the lexical resources + for (LexicalResource lexicalResource : lexicalResources) { + try { + lexicalResource.close(); + } catch (LexicalResourceCloseException e) { + logger.warn("Closing the resource failed.", e); + } + } + } + + // Private Methods + + /** + * Call this method once before starting to align sentence pairs. + * @param config a CommonConfig instance. The aligner retrieves the lexical + * resources configuration values. + * @throws LexicalResourceException if initialization of a resource failed + * @throws ConfigurationException if the configuration is invalid + */ + private void init(CommonConfig config) throws LexicalResourceException, + ConfigurationException { + + // Get the general parameters configuration section + NameValueTable paramsSection = null; + try { + paramsSection = config.getSection(GENERAL_PARAMS_CONF_SECTION); + } catch (ConfigurationException e) { + throw new ConfigurationException(e); + } + + maxPhrase = paramsSection.getInteger(MAX_PHRASE_KEY); + + // Get the Lexical Resources configuration section + NameValueTable lexicalResourcesSection = null; + try { + lexicalResourcesSection = config.getSection(LEXICAL_RESOURCES_CONF_SECTION); + } catch (ConfigurationException e) { + throw new ConfigurationException(e); + } + + lexicalResources = new ArrayList>(); + ConfigurationFile configFile = new ConfigurationFile(config); + + // Get each resource and create it using the configuration section related to it + for (String resourceName : lexicalResourcesSection.keySet()) { + + // Get the class name + String resourceClassName = lexicalResourcesSection.getString(resourceName); + + // Get the configuration params + ConfigurationParams resourceParams = + configFile.getModuleConfiguration(resourceName); + resourceParams.setExpandingEnvironmentVariables(true); + LexicalResource lexicalResource = + createLexicalResource(resourceClassName, resourceParams); + + if (lexicalResource != null) { + lexicalResources.add(lexicalResource); + + PartOfSpeech leftSidePOS = null, rightSidePOS = null; + + // Add the information about this resource + + // Get the right and left side POS, in case it's mentioned + if (resourceParams.keySet().contains(LEFT_SIDE_POS_PARAM)) { + try { + leftSidePOS = new ByCanonicalPartOfSpeech(resourceParams.getString(LEFT_SIDE_POS_PARAM)); + } catch (UnsupportedPosTagStringException e) { + logger.warn("Could not load POS for left side: " + + resourceParams.getString(LEFT_SIDE_POS_PARAM) + + ". Alignment links of all POS will be retreived."); + } + } + + if (resourceParams.keySet().contains(RIGHT_SIDE_POS_PARAM)) { + try { + rightSidePOS = new ByCanonicalPartOfSpeech(resourceParams.getString(RIGHT_SIDE_POS_PARAM)); + } catch (UnsupportedPosTagStringException e) { + logger.warn("Could not load POS for right side: " + + resourceParams.getString(RIGHT_SIDE_POS_PARAM) + + ". Alignment links of all POS will be retreived."); + } + } + + lexicalResourcesInformation.put(lexicalResource.getClass().getName(), + new LexicalResourceInformation( + resourceParams.getString(VERSION_PARAM), + resourceParams.getBoolean(USE_LEMMA_PARAM), + leftSidePOS, rightSidePOS)); + } + } + } + + /** + * Uses the annotations in the CAS and extracts the tokens and + * their lemmas from the text and hypothesis views + * @param aJCas The JCas object of the text and hypothesis, + * after tokenization and lemmatization. + * @throws CASException + */ + private void getTokenAnnotations(JCas aJCas) throws CASException { + + // Get the text and hypothesis views + JCas textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + JCas hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + // Get the tokens + textTokens = new ArrayList(JCasUtil.select(textView, Token.class)); + hypoTokens = new ArrayList(JCasUtil.select(hypoView, Token.class)); + } + + /** + * Get a phrase from a list of consecutive tokens + * @param tokens The list of tokens + * @param start The start token index + * @param end The end token index + * @param supportLemma The current lexical resources needs right and left lemmas + * rather than surface words + * @return The phrase containing the tokens from start to end + */ + private String getPhrase(List tokens, int start, int end, boolean supportLemma) { + + StringBuilder phrase = new StringBuilder(); + + for (int tokenIndex = start; tokenIndex < end + 1; ++tokenIndex) { + phrase.append(supportLemma ? + tokens.get(tokenIndex).getLemma().getValue() : + tokens.get(tokenIndex).getCoveredText()); + phrase.append(" "); + } + + // Remove last space + if (phrase.length() > 0) { + phrase.deleteCharAt(phrase.length() - 1); + } + + return phrase.toString(); + } + + /** + * Get rules of type leftSide -> rightSide, using the given lexical resource + * @param resource The lexical resource to use + * @param leftSide The phrase that will be looked for as lhs of a rule + * @param rightSide The phrase that will be looked for as rhs of a rule + * @param partOfSpeech2 + * @param partOfSpeech + * @return The list of rules leftSide -> rightSide + * @throws LexicalResourceException + */ + private List> + getRules(LexicalResource resource, + String leftSide, String rightSide, + PartOfSpeech leftSidePOS, PartOfSpeech rightSidePOS) + throws LexicalResourceException { + + List> rules = + new ArrayList>(); + + try { + + // WordNet workaround: + // Make sure the synsets of the right and left sides of the rule + // are equal to the right and left phrases. + // (WN returns rules associated with any of the words in the phrase) + if (resource.getClass().getName().toLowerCase().contains(WORDNET)) { + + for (LexicalRule rule : + resource.getRules(leftSide, leftSidePOS, rightSide, rightSidePOS)) { + + WordnetRuleInfo ruleInfo = (WordnetRuleInfo)rule.getInfo(); + + if ((ruleInfo.getLeftSense().getWords().contains(leftSide)) && + (ruleInfo.getRightSense().getWords().contains(rightSide))) { + + addRuleToList(rules, rule); + } + } + + } else { + + // Get rules from t to h + for (LexicalRule rule : + resource.getRules(leftSide, leftSidePOS, rightSide, rightSidePOS)) { + + addRuleToList(rules, rule); + } + } + + } catch (Exception e) { + logger.warn("Could not add rules from " + + resource.getClass().getSimpleName() + " for " + + leftSide + "->" + rightSide, e); + } + + return rules; + } + + /** + * Adds a rule to the list of rules, only if there exists no other rule with the + * same rule info and a lower confidence + * @param rules The list of rules + * @param rule The new rule to add + */ + private void addRuleToList(List> rules, + LexicalRule rule) { + + boolean addRule = true; + + for (int otherIndex = 0; otherIndex < rules.size(); ++otherIndex) { + + LexicalRule otherRule = rules.get(otherIndex); + + if (getLinkInfo(rule).equals(getLinkInfo(otherRule))) { + + addRule = false; + + // Replace the rule with the same info and a lower confidence + if (rule.getConfidence() > otherRule.getConfidence()) { + rules.set(otherIndex, rule); + } + + break; + } + } + + if (addRule) { + rules.add(rule); + } + } + + /** + * Add an alignment link from T to H, based on the rule t->h + * in which t is a phrase in T from index textStart to textEnd of the tokens, + * and h is a phrase in H from index hypoStart to hypoEnd of the tokens, + * @param aJCas The JCas object + * @param textStart The index of the first token in T in this alignment link + * @param textEnd The index of the last token in T in this alignment link + * @param hypoStart The index of the first token in H in this alignment link + * @param hypoEnd The index of the last token in H in this alignment link + * @param resourceName The lexical resource that this rule came from + * @param lexicalResourceVersion The version of the lexical resource + * @param confidence The confidence of the rule + * @param linkDirection The direction of the link (t to h, h to t or bidirectional). + * @param linkInfo The relation of the rule (Wordnet synonym, Wikipedia redirect etc). + * @throws CASException + */ + private void addAlignmentAnnotations(JCas aJCas, int textStart, int textEnd, + int hypoStart, int hypoEnd, + String resourceName, + String lexicalResourceVersion, + double confidence, + Direction linkDirection, + String linkInfo) + throws CASException { + + // Get the text and hypothesis views + JCas textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + JCas hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + // Prepare the Target instances + Target textTarget = new Target(textView); + Target hypoTarget = new Target(hypoView); + + // Prepare an FSArray instance and put the target annotations in it + FSArray textAnnots = new FSArray(textView, textEnd - textStart + 1); + FSArray hypoAnnots = new FSArray(hypoView, hypoEnd - hypoStart + 1); + + int tokenIndex = 0; + + for (Token token : textTokens.subList(textStart, textEnd + 1)) { + textAnnots.set(tokenIndex++, token); + } + + tokenIndex = 0; + + for (Token token : hypoTokens.subList(hypoStart, hypoEnd + 1)) { + hypoAnnots.set(tokenIndex++, token); + } + + textTarget.setTargetAnnotations(textAnnots); + hypoTarget.setTargetAnnotations(hypoAnnots); + + // Set begin and end value of the Target annotations + textTarget.setBegin(textTokens.get(textStart).getBegin()); + textTarget.setEnd(textTokens.get(textEnd).getEnd()); + hypoTarget.setBegin(hypoTokens.get(hypoStart).getBegin()); + hypoTarget.setEnd(hypoTokens.get(hypoEnd).getEnd()); + + // Add the targets to the indices + textTarget.addToIndexes(); + hypoTarget.addToIndexes(); + + // Mark an alignment.Link and add it to the hypothesis view + Link link = new Link(hypoView); + link.setTSideTarget(textTarget); + link.setHSideTarget(hypoTarget); + + // Set the link direction + link.setDirection(linkDirection); + + // Set strength according to the rule data + link.setStrength(confidence); + + // Add the link information + link.setAlignerID(resourceName); + link.setAlignerVersion(lexicalResourceVersion); + link.setLinkInfo(linkInfo); + + // Set the group labels + String relationType = resourceName + "__" + linkInfo; + + if (linkInfoToDomainLevel.containsKey(relationType)) { + link.addGroupLabel(linkInfoToDomainLevel.get(relationType)); + } + + if (linkInfoToInferenceLevel.containsKey(relationType)) { + link.addGroupLabel(linkInfoToInferenceLevel.get(relationType)); + } + + // Mark begin and end according to the hypothesis target + link.setBegin(hypoTarget.getBegin()); + link.setEnd(hypoTarget.getEnd()); + + // Add to index + link.addToIndexes(); + } + + /** + * Receives a rule and return the type of the rule, + * such as "synonym" or "hypernym" for WordNet, "redirect" + * for Wikipedia, etc. The default value is "local-entailment".
+ * A better solution is to add an abstract class implementing RuleInfo, + * that all the concrete RuleInfos will extend. This class will contain a + * field "relation" with a default of "local-entailment". + * Then we can call: rule.getInfo().getRelation() without having to + * know which resource the rule belongs to. + * @param rule + * @return The type of the rule + */ + private String getLinkInfo(LexicalRule rule) { + + String type = "local-entailment"; + + // WordNet + if (rule.getResourceName().equals("WORDNET")) { + type = ((WordnetRuleInfo)rule.getInfo()).getTypedRelation().name(); + } + + // VerbOcean + else if (rule.getResourceName().equals("VerbOcean")) { + type = ((VerbOceanRuleInfo)rule.getInfo()).getRelationType().name(); + } + + // Wikipedia + if (rule.getResourceName().equals("Wikipedia")) { + type = rule.getRelation(); + } + + return type; + } + + /** + * Constructs a {@link LexicalResource} for the given class name + * and a configuration subsection with parameters related to it. + * + * This function is allowed to return null.
+ * The caller must check if the return value is null. + * + * @param resourceClassName The class name of the lexical resource to load + * @param configurationParams The {@link ConfigurationParams} object related + * to the specific lexical resources. + * @return + * @throws ConfigurationException + * @throws LexicalResourceException + */ + @SuppressWarnings("unchecked") + private LexicalResource + createLexicalResource(String resourceClassName, + ConfigurationParams configurationParams) + throws ConfigurationException, LexicalResourceException + { + LexicalResource lexicalResource = null; + + // Load the class using reflection + Class> resourceClass; + Constructor> ctor; + + try { + resourceClass = (Class>) + Class.forName(resourceClassName); + ctor = resourceClass.getConstructor(ConfigurationParams.class); + lexicalResource = ctor.newInstance(configurationParams); + logger.info("Loaded resource: " + resourceClassName); + } catch (Exception e) { + logger.error("Could not instantiate the lexical resource " + + resourceClassName, e); + return null; + } + + return lexicalResource; + } + + /** + * Receives a list of rules of type t->h and h->t and creates the + * alignment links for them + * @param aJCas The JCas object + * @param textStart The index of the first token in T in this alignment link + * @param textEnd The index of the last token in T in this alignment link + * @param hypoStart The index of the first token in H in this alignment link + * @param hypoEnd The index of the last token in H in this alignment link + * @param rulesFromLeft The list of rules t->h + * @param rulesFromRight The list of rules h->t + * @param lexicalResourceVersion The lexical resource version + * @throws CASException + */ + private void createAlignmentLinks(JCas aJCas, int textStart, int textEnd, + int hypoStart, int hypoEnd, + List> rulesFromLeft, + List> rulesFromRight, + String lexicalResourceVersion) throws CASException { + + // Find rules that match by rule info and make them bidirectional + for (int leftRuleIndex = rulesFromLeft.size() - 1; + leftRuleIndex >= 0; --leftRuleIndex) { + for (int rightRuleIndex = rulesFromRight.size() - 1; + rightRuleIndex >= 0; --rightRuleIndex) { + + if (areOppositeLinks(rulesFromLeft.get(leftRuleIndex), + rulesFromRight.get(rightRuleIndex))) { + + // Remove these rules from the list + LexicalRule rightRule = + rulesFromRight.remove(rightRuleIndex); + LexicalRule leftRule = + rulesFromLeft.remove(leftRuleIndex); + + // Add the annotation + addAlignmentAnnotations(aJCas, textStart, textEnd, hypoStart, hypoEnd, + rightRule.getResourceName(), lexicalResourceVersion, + Math.max(rightRule.getConfidence(), leftRule.getConfidence()), + Direction.Bidirection, getLinkInfo(rightRule)); + + break; + } + } + } + + // Add rules from t to h + for (LexicalRule rule : rulesFromLeft) { + + addAlignmentAnnotations(aJCas, textStart, textEnd, hypoStart, hypoEnd, + rule.getResourceName(), lexicalResourceVersion, + rule.getConfidence(), Direction.TtoH, getLinkInfo(rule)); + } + + // Add rules from h to t + for (LexicalRule rule : rulesFromRight) { + + addAlignmentAnnotations(aJCas, textStart, textEnd, hypoStart, hypoEnd, + rule.getResourceName(), lexicalResourceVersion, + rule.getConfidence(), Direction.HtoT, getLinkInfo(rule)); + } + } + + /** + * Returns true if these two rules are opposite, meaning that: + * the first rule is w1->w2, with confidence c and relation r + * the second rule is w2->w1, with confidence c and relation r + * @param firstRule The first rule + * @param secondRule The second rule + * @return Whether the rules are opposite + */ + private boolean areOppositeLinks( + LexicalRule firstRule, + LexicalRule secondRule) { + + return ((getLinkInfo(firstRule).equals(getLinkInfo(secondRule))) && + ((Math.abs(firstRule.getConfidence() - + secondRule.getConfidence()) <= 0.000001))); + } +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/LexicalResourceInformation.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/LexicalResourceInformation.java new file mode 100644 index 00000000..1ab5949a --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/LexicalResourceInformation.java @@ -0,0 +1,48 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink; + +import eu.excitementproject.eop.common.representation.partofspeech.PartOfSpeech; + +/** + * Contains general information about a lexical resource, + * such as the version, and whether to use lemma or surface level + * tokens to search for rules. + * + * @author Vered Shwartz + * @since 25/06/2014 + */ +public class LexicalResourceInformation { + + // Private Members + private String version; + private boolean useLemma; + private PartOfSpeech leftSidePOS, rightSidePOS; + + // Properties + + public String getVersion() { + return version; + } + + public boolean useLemma() { + return useLemma; + } + + public PartOfSpeech getRightSidePOS() { + return rightSidePOS; + } + + public PartOfSpeech getLeftSidePOS() { + return leftSidePOS; + } + + // Constructors + + public LexicalResourceInformation(String version, boolean useLemma, + PartOfSpeech leftSidePOS, PartOfSpeech rightSidePOS) { + super(); + this.version = version; + this.useLemma = useLemma; + this.leftSidePOS = leftSidePOS; + this.rightSidePOS = rightSidePOS; + } +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/DerivBaseDELinker.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/DerivBaseDELinker.java new file mode 100644 index 00000000..0b298b2f --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/DerivBaseDELinker.java @@ -0,0 +1,76 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped; + +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.exception.ComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner; +import eu.excitementproject.eop.core.component.lexicalknowledge.derivbase.DerivBaseResource; + +/** + * + * A lexical aligner class that links tokens based on DerivBase German resource. + * This is a convenience class. The class utilizes DerivBaseResource class and + * LexicalAligner class to make an aligner. + * @author Tae-Gil Noh + * + */ +public class DerivBaseDELinker implements AlignmentComponent { + + /** + * + * Default parameters. note that this won't work when you use EOP as library. + * + */ + public DerivBaseDELinker() throws AlignmentComponentException + { + this(true, 20); + } + + + /** + * + * @param wordNetPath + * @throws AlignmentComponentException + */ + public DerivBaseDELinker(boolean useDerivBaseScore, Integer derivSteps) throws AlignmentComponentException { + + try + { + DerivBaseResource lex = new DerivBaseResource(useDerivBaseScore, derivSteps); + LexicalAligner theAligner = LexicalAlignerFactory.getLexicalAlignerFromLexicalResource(lex, 1, "1.3", true, null, null); + worker = theAligner; + } + catch (ComponentException e) + { + throw new AlignmentComponentException ("Underlying resource thrown an exception: " + e.getMessage(), e); + } + catch (ConfigurationException ce) + { + throw new AlignmentComponentException ("Underlying resource thrown an exception: " + ce.getMessage(), ce); + } + + + + } + + public void annotate(JCas aJCas) throws AlignmentComponentException + { + worker.annotate(aJCas); + } + + // private variable + private final LexicalAligner worker; + + public String getComponentName() + { + return this.getClass().getName(); + } + + public String getInstanceName() + { + return null; + } +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/GermaNetDELinker.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/GermaNetDELinker.java new file mode 100644 index 00000000..bf647d15 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/GermaNetDELinker.java @@ -0,0 +1,67 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped; + +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.exception.ComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner; +import eu.excitementproject.eop.core.component.lexicalknowledge.germanet.GermaNetWrapper; + +/** + * + * A lexical aligner class that links tokens based on GermaNet. + * This is a convenience class. The class utilizes GermaNetWrapper class and + * LexicalAligner class to make an aligner. + * @author Tae-Gil Noh + * + */ +public class GermaNetDELinker implements AlignmentComponent { + + /** + * + * @param germaNetPath + * @throws AlignmentComponentException + */ + public GermaNetDELinker(String germaNetPath) throws AlignmentComponentException { + + try + { + // Filepath, weights (causes, entails, hypernym, synonym, hyponym, and antonym) + // 0 weight will make that relation not added as alignment.link. + GermaNetWrapper lex = new GermaNetWrapper(germaNetPath, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0); + LexicalAligner theAligner = LexicalAlignerFactory.getLexicalAlignerFromLexicalResource(lex, 1, "8.0", true, null, null); + worker = theAligner; + } + catch (ComponentException ee) + { + throw new AlignmentComponentException ("Underlying resource thrown an exception: " + ee.getMessage(), ee); + } + catch (ConfigurationException ce) + { + throw new AlignmentComponentException ("Underlying resource thrown an exception: " + ce.getMessage(), ce); + } + + + + } + + public void annotate(JCas aJCas) throws AlignmentComponentException + { + worker.annotate(aJCas); + } + + // private variable + private final LexicalAligner worker; + + public String getComponentName() + { + return this.getClass().getName(); + } + + public String getInstanceName() + { + return null; + } +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/GermanTransDMDELinker.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/GermanTransDMDELinker.java new file mode 100644 index 00000000..a1967507 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/GermanTransDMDELinker.java @@ -0,0 +1,71 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped; + +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner; +import eu.excitementproject.eop.core.component.lexicalknowledge.transDm.GermanTransDmResource; + +/** + * + * A lexical aligner class that links tokens based on TransDM German resource. + * This is a convenience class. The class utilizes GermanTransDMResource class and + * LexicalAligner class to make an aligner. + * @author Tae-Gil Noh + * + */ +public class GermanTransDMDELinker implements AlignmentComponent { + + /** + * + * Default parameters. note that this won't work when you use EOP as library. + * + */ + public GermanTransDMDELinker() throws AlignmentComponentException + { + this("all"); + } + + + /** + * + * @param wordNetPath + * @throws AlignmentComponentException + */ + public GermanTransDMDELinker(String simMeasureChoice) throws AlignmentComponentException { + + try + { + GermanTransDmResource lex = new GermanTransDmResource(simMeasureChoice); + LexicalAligner theAligner = LexicalAlignerFactory.getLexicalAlignerFromLexicalResource(lex, 1, "1.3", true, null, null); + worker = theAligner; + } + catch (ConfigurationException ce) + { + throw new AlignmentComponentException ("Underlying resource thrown an exception: " + ce.getMessage(), ce); + } + + + + } + + public void annotate(JCas aJCas) throws AlignmentComponentException + { + worker.annotate(aJCas); + } + + // private variable + private final LexicalAligner worker; + + public String getComponentName() + { + return this.getClass().getName(); + } + + public String getInstanceName() + { + return null; + } +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/LexicalAlignerFactory.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/LexicalAlignerFactory.java new file mode 100644 index 00000000..0ec18ac5 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/LexicalAlignerFactory.java @@ -0,0 +1,58 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped; + +import java.util.ArrayList; +import java.util.HashMap; + +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalResource; +import eu.excitementproject.eop.common.component.lexicalknowledge.RuleInfo; +import eu.excitementproject.eop.common.representation.partofspeech.PartOfSpeech; +import eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner; +import eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalResourceInformation; + +/** + * A static class for lexical resource developers (?). Give one lexical resource with parameters, + * the methods will return a lexical aligner that works with the given resource. + * + * @author Tae-Gil Noh + * + */ +public class LexicalAlignerFactory { + + static LexicalAligner getLexicalAlignerFromLexicalResource(LexicalResource aLexicalResource, int maxPhrase, String resourceVersion, Boolean useLemma, PartOfSpeech leftPos, PartOfSpeech rightPos) + { + ArrayList> lexicalResourceArr = new ArrayList>(); + lexicalResourceArr.add(aLexicalResource); + HashMap optionMap = new HashMap(); + LexicalResourceInformation option = new LexicalResourceInformation(resourceVersion, useLemma, leftPos, rightPos); + optionMap.put(aLexicalResource.getClass().getName(), option); + + LexicalAligner aligner = new LexicalAligner(lexicalResourceArr, maxPhrase, optionMap); + return aligner; + } + +// +// /** +// * Initialize a lexical aligner using parameters. +// * This is a convenience constructor, that helps easy initialization for lexical resource developers. +// * +// * It gets one resource, and one information about this resource, and uses that to make a lexical aligner. +// * +// * @param lexicalResources An initialized instance of a lexical resource. +// * @param maxPhrase The maximum length of phrase to align. If your lexical resource does not support "phrase (multi-word)", you can set this to 1. +// * @param lexicalResourcesInformation Additional information required for the aligner to denote the +// * information section of the links generated by this lexical resource. +// */ +// public LexicalAligner(LexicalResource aLexicalResource, int maxPhrase, String resourceVersion, Boolean useLemma, PartOfSpeech leftPos, PartOfSpeech rightPos) +// { +// this.lexicalResources = new ArrayList>(); +// this.lexicalResources.add(aLexicalResource); +// this.maxPhrase = maxPhrase; +// HashMap map = new HashMap(); +// LexicalResourceInformation info = new LexicalResourceInformation(resourceVersion, useLemma, leftPos, rightPos); +// map.put(aLexicalResource.getClass().getName(), info); +// this.lexicalResourcesInformation = map; +// } +// +// // TODO Auto-generated constructor stub + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/VerbOceanENLinker.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/VerbOceanENLinker.java new file mode 100644 index 00000000..fb6f7e92 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/VerbOceanENLinker.java @@ -0,0 +1,86 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped; + +import java.io.File; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalResourceException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner; +import eu.excitementproject.eop.core.component.lexicalknowledge.verb_ocean.RelationType; +import eu.excitementproject.eop.core.component.lexicalknowledge.verb_ocean.VerbOceanLexicalResource; + +/** + * + * A lexical aligner class that links tokens based on VerbOcean. + * Convenience class. The class utilizes VerbOcean class and LexicalAligner class to make + * the aligner. + * + * @author Tae-Gil Noh + * + */ +public class VerbOceanENLinker implements AlignmentComponent { + + /** + * + * Default constructor with no param. Will initiated VerbOcean with default params + * note that this won't work when you use EOP as library. + * In such a case, use the other constructor (with path, and allowed relation type) + * + */ + public VerbOceanENLinker() throws AlignmentComponentException + { + this(new File(verbOceanDefaultPath), defaultRelations); + } + + + /** + * Main constructor. + * + * @param wordNetPath verbOcean text file + * @param allowedRelationTypes VerbOcean relation types that you want to be added as alignment.Links + * @throws AlignmentComponentException + */ + public VerbOceanENLinker(File verbOceanFile, Set allowedRelationTypes) throws AlignmentComponentException { + + try + { + VerbOceanLexicalResource lex = new VerbOceanLexicalResource(1.0, verbOceanFile, allowedRelationTypes); + LexicalAligner theAligner = LexicalAlignerFactory.getLexicalAlignerFromLexicalResource(lex, 1, "1.0", true, null, null); + worker = theAligner; + } + catch (LexicalResourceException e) + { + throw new AlignmentComponentException ("Underlying resource thrown an exception: " + e.getMessage(), e); + } + + + } + + public void annotate(JCas aJCas) throws AlignmentComponentException + { + worker.annotate(aJCas); + } + + // private variable + private final LexicalAligner worker; + + // const, default values. Woudln't work when within Jar! + private static final String verbOceanDefaultPath = "../core/src/main/resources/VerbOcean/verbocean.unrefined.2004-05-20.txt"; + private static final HashSet defaultRelations = new HashSet(Arrays.asList(RelationType.STRONGER_THAN)); + + + public String getComponentName() + { + return this.getClass().getName(); + } + + public String getInstanceName() + { + return null; + } +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/WordNetENLinker.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/WordNetENLinker.java new file mode 100644 index 00000000..9f11ed9b --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/WordNetENLinker.java @@ -0,0 +1,121 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped; + +import java.io.File; +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.common.utilities.configuration.ImplCommonConfig; +import eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner; + +/** + * This is, useful, but a broken code. + * (e.g. won't work within a JAR) + * + * TODO + * Update ASAP, after updating LexicalAligner to accept + * List of lexical resource in its constructor. + * + * *sigh* ... Well, but it works as is, at least within Source Tree Trunk. + * + * @author Tae-Gil Noh + * + */ +public class WordNetENLinker implements AlignmentComponent { + + /** + * WARN: Broken code; wordNetPath doesn't work. --- update either commonconfig + * or LexicalAligner. Thus, it will rely on fixed XML file path --- which won't work + * within a Jar (used as a library) + * + * @param wordNetPath + * @throws AlignmentComponentException + */ + public WordNetENLinker(String wordNetPath) throws AlignmentComponentException { + + this.wordNetPath = wordNetPath; + File configFile = new File("../core/src/main/resources/configuration-file/lexlinkers/WordNetENLinker.xml"); + try { + config = new ImplCommonConfig(configFile); + } + catch (ConfigurationException ce) + { + throw new AlignmentComponentException("Reading base configuration file failed!", ce); + } + //updateConfig(); + worker = new LexicalAligner(config); + + } + + public void annotate(JCas aJCas) throws AlignmentComponentException + { + worker.annotate(aJCas); + } + + + // this doesn't work. :-( ... CommonConfig implementation is really weak for now. +// private void updateConfig() throws AlignmentComponentException +// { +// try +// { +// NameValueTable wordNet = config.getSection("wordnet"); +// wordNet.setString("wordnet-dir", this.wordNetPath); +// +// } +// catch (ConfigurationException ce) +// { +// throw new AlignmentComponentException("failed at generating configuration object", ce); +// } +// +// } + +// private void updateConfig() throws AlignmentComponentException +// { +// // fill this.config for WordNet & LexicalAligner +// // Note that this is actually making up a configuration via code, instead of XML +// try +// { +// // For LexicalAligner +// NameValueTable general = config.getSection(LexicalAligner.GENERAL_PARAMS_CONF_SECTION); +// general.setString(LexicalAligner.MAX_PHRASE_KEY, "5"); +// +// NameValueTable lexicalResource = config.getSection(LexicalAligner.LEXICAL_RESOURCES_CONF_SECTION); +// lexicalResource.setString("wordnet", "eu.excitementproject.eop.core.component.lexicalknowledge.wordnet.WordnetLexicalResource"); +// +// // For WordNetResource +// NameValueTable wordNet = config.getSection("wordnet"); +// wordNet.setString("useLemma", "false"); +// wordNet.setString("version", "3.0"); +// //wordNet.setString("wordnet-dir", "../data/WordNet/3.0/dict.wn.orig"); +// wordNet.setString("wordnet-dir", this.wordNetPath); +// wordNet.setString("useFirstSenseOnlyLeft", "true"); +// wordNet.setString("useFirstSenseOnlyRight", "true"); +// wordNet.setString("entailing-relations", "SYNONYM,DERIVATIONALLY_RELATED,HYPERNYM,INSTANCE_HYPERNYM,MEMBER_HOLONYM,PART_HOLONYM,ENTAILMENT,SUBSTANCE_MERONYM"); +// wordNet.setString("wordnet-depth", "2"); +// } +// catch (ConfigurationException ce) +// { +// throw new AlignmentComponentException("failed at generating configuration object", ce); +// } +// } + + // private variable for path of underlying resource + @SuppressWarnings("unused") + private final String wordNetPath; + + // two commmon private variables + private final LexicalAligner worker; + private final ImplCommonConfig config; + + + public String getComponentName() + { + return this.getClass().getName(); + } + + public String getInstanceName() + { + return null; + } +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/WordNetITLinker.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/WordNetITLinker.java new file mode 100644 index 00000000..80ec450d --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/WordNetITLinker.java @@ -0,0 +1,80 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped; + +import java.io.File; +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalResourceException; +import eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner; +import eu.excitementproject.eop.core.component.lexicalknowledge.wordnet.WordnetLexicalResource; +import eu.excitementproject.eop.core.utilities.dictionary.wordnet.WordNetRelation; + +/** + * + * A lexical aligner class that links tokens based on Italian WordNet. + * This is a convenience class. The class utilizes WordNetLexicalResource class + * and LexicalAligner class to make the aligner. + * + * @author Tae-Gil Noh + * + */ +public class WordNetITLinker implements AlignmentComponent { + + /** + * + * Default parameters. note that this won't work when you use EOP as library. + * + */ + public WordNetITLinker(File wordNetITPath) throws AlignmentComponentException + { + this(wordNetITPath, false, false, defaultRelations, 1); + } + +// @SuppressWarnings("rawtypes") +// LexicalResource resource = new WordnetLexicalResource(new File(path), false, false, relations, 3); + + /** + * + * @param wordNetPath + * @throws AlignmentComponentException + */ + public WordNetITLinker(File wordNetITPath, boolean useFirstSenseLeftOnly, boolean useFirstSenseRightOnly, Set allowedRelationTypes, int chainingLength) throws AlignmentComponentException { + + try + { + WordnetLexicalResource lex = new WordnetLexicalResource(wordNetITPath, false, false, defaultRelations, 1); + LexicalAligner theAligner = LexicalAlignerFactory.getLexicalAlignerFromLexicalResource(lex, 1, "1.0", true, null, null); + worker = theAligner; + } + catch (LexicalResourceException e) + { + throw new AlignmentComponentException ("Underlying resource thrown an exception: " + e.getMessage(), e); + } + } + + public void annotate(JCas aJCas) throws AlignmentComponentException + { + worker.annotate(aJCas); + } + + // private variable + private final LexicalAligner worker; + + private static final HashSet defaultRelations = new HashSet(Arrays.asList(WordNetRelation.SYNONYM)); + + + public String getComponentName() + { + return this.getClass().getName(); + } + + public String getInstanceName() + { + return null; + } +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/package-info.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/package-info.java new file mode 100644 index 00000000..9a6aca66 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/package-info.java @@ -0,0 +1,15 @@ +/** + * This package holds convenient classes that wraps + * an instance of "lexical aligner", based on LexicalAligner class. + * + * Why provide such a wrapped lexical-aligner that only aligns based on a + * specific lexical resource? -- for ease of using, (e.g. oneliner) without + * explicitly dealing with configurations. + * + * (e.g. P1 EDA users can simply init and call annotate(), don't care much + * about underlying configuration file) + * + * @author Tae-Gil Noh + * + */ +package eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped; \ No newline at end of file diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/nemex/NemexAligner.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/nemex/NemexAligner.java new file mode 100644 index 00000000..26e975e2 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/nemex/NemexAligner.java @@ -0,0 +1,482 @@ +/** + * + */ +package eu.excitementproject.eop.core.component.alignment.nemex; + +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.FSArray; +import org.apache.uima.jcas.cas.StringArray; +import org.uimafit.util.JCasUtil; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.Link.Direction; +import eu.excitement.type.alignment.Target; +import eu.excitement.type.nemex.*; +import de.dfki.lt.nemex.a.*; +import de.dfki.lt.nemex.a.data.GazetteerNotLoadedException; + +/** + * + * This class provides nemex.NemexType annotations for a given JCas pair and + * adds alignment.Link on nemex.NemexType. + * + * Adds alignment from hypothesis to text annotations. An online Gazetteer is + * created based on queries from the hypothesis. The queries in text are + * annotated based on the online Gazetteer. + * + * (This is the first version of the aligner and does not do efficient query + * generation and disambiguation. It simply creates all possible substrings as + * queries from hypothesis and text and adds all possible approximate + * alignments.) + * + * Resource it is based on: Nemex-A tool developed at DFKI. + * + * + * + * @author Madhumita + * @since June 2014 + * + */ + +public class NemexAligner implements AlignmentComponent { + + public NemexAligner(String gazetteerFilePath, String delimiter, + Boolean delimiterSwitchOff, int nGramSize, + Boolean ignoreDuplicateNgrams, String similarityMeasure, + double similarityThreshold) { + + this.gazetteerFilePath = gazetteerFilePath; + this.delimiter = delimiter; + this.delimiterSwitchOff = delimiterSwitchOff; + this.nGramSize = nGramSize; + this.ignoreDuplicateNgrams = ignoreDuplicateNgrams; + + this.similarityMeasure = similarityMeasure; + this.similarityThreshold = similarityThreshold; + // NEMEX_A.loadNewGazetteer(this.gazetteerFilePath, this.delimiter, + // this.delimiterSwitchOff, this.nGramSize, + // this.ignoreDuplicateNgrams); + } + + /** + * This method adds nemex.NemexType annotations between text and hypothesis + * for a given JCas pair and then adds alignment.Link between two + * nemex.NemexType targets. + * + * @param JCas + * aJCas The view, that holds the sentence(s) to be analyzed. + * @return + */ + + public void annotate(JCas aJCas) throws PairAnnotatorComponentException { + // intro log + logger.info("annotate() called with a JCas with the following T and H; "); + + if (aJCas == null) { + logger.info("Null JCas object"); + throw new AlignmentComponentException( + "annotate() got a null JCas object."); + } + + HashMap queryMap = new HashMap(); + + HashMap> queryIndex = new HashMap>(); + + try { + JCas hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + createDictionary(hypoView, queryMap, queryIndex); + + } catch (CASException e) { + throw new AlignmentComponentException( + "Failed to access the hypothesis view", e); + } + + try { + JCas textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + logger.info("TEXT: " + textView.getDocumentText()); + annotateSubstring(textView, queryMap, queryIndex); + + } catch (CASException e) { + throw new AlignmentComponentException( + "Failed to access the text view", e); + } + + } + + /** + * This method creates an online Gazetteer from queries in H, which is used + * to lookup aligned T queries. + * + * After creating the Gazetteer, it loads it to the NemexA system. + * + * It also annotates the H queries with nemex.NemexType. + * + * @param JCas + * hypoView The hypothesis view, that holds the hypothesis to be + * analyzed. + * @param HashMap + * queryMap The map of unique query Id and + * query (dictionary entry) String + * @param HashMap + * > queryIndex Inverted index + * from queries to the hypotheses and offsets which generate the + * query. + * @return + */ + public void createDictionary(JCas hypoView, + HashMap queryMap, + HashMap> queryIndex) + throws PairAnnotatorComponentException { + logger.info("HYPO: " + hypoView.getDocumentText()); + + String hypothesis = hypoView.getDocumentText().toLowerCase(); + hypothesis = hypothesis.replaceAll(" ", "#"); + String query = new String(); + + int index = 0; + double totalNoOfQueries = 0; + logger.info("Creating queries from hypothesis"); + for (int i = 0; i < hypothesis.length(); i++) { + for (int j = i + 1; j <= hypothesis.length(); j++) { + + query = hypothesis.substring(i, j); + + ArrayList offsets = new ArrayList(); + + QueryOffset curOffset = new QueryOffset(hypoView, i, j); + + if (queryMap.containsValue(query)) { + offsets = queryIndex.get(query); + } else { + index++; + queryMap.put(index, query); + } + + totalNoOfQueries++; + offsets.add(curOffset); + + queryIndex.put(query, offsets); + + } + } + + logger.info("Finished creating queries"); + + logger.info("Adding queries to dictionary"); + Iterator> iter = queryMap.entrySet().iterator(); + + PrintWriter fw; + try { + fw = new PrintWriter(new FileWriter(this.gazetteerFilePath)); + fw.println("0 utf-8 EN " + (int) totalNoOfQueries + " " + + queryMap.size()); + fw.close(); + + fw = new PrintWriter(new FileWriter(this.gazetteerFilePath, true)); + while (iter.hasNext()) { + + Map.Entry queryEntry = (Map.Entry) iter + .next(); + int idx = (int) queryEntry.getKey(); + String queryText = (String) queryEntry.getValue(); + + ArrayList value = (ArrayList) queryIndex + .get(queryText); + + logger.info("Creating dictionary entry from hypothesis query"); + + List values = new ArrayList(); + values.add(queryText); + + String entry = new String(); + entry = new String(idx + " " + + Math.log(value.size() / totalNoOfQueries) + " " + + queryText + " " + "NG:" + "1:" + + Math.log(value.size() / totalNoOfQueries)); + + logger.info("Adding entry to dictionary," + entry); + + fw.println(entry); + // NEMEX_A.loadedGazetteers.get(this.gazetteerFilePath) + // .getGazetteer().addNewEntry(entry.get(0)); + logger.info("Finished adding entry to dictionary"); + + Iterator queryIter = value.iterator(); + while (queryIter.hasNext()) { + QueryOffset hQuery = (QueryOffset) queryIter.next(); + int start = hQuery.getStartOffset(); + int end = hQuery.getEndOffset(); + logger.info("Adding NemexType annotation on hypothesis query"); + addNemexAnnotation(hypoView, values, start, end); + logger.info("Finished adding NemexType annotation on hypothesis query"); + } + + } + fw.close(); + NEMEX_A.loadNewGazetteer(this.gazetteerFilePath, this.delimiter, + this.delimiterSwitchOff, this.nGramSize, + this.ignoreDuplicateNgrams); + } catch (IOException e) { + logger.info("Error updating the Gazetteer file"); + e.printStackTrace(); + } + + } + + /** + * This method adds nemex.NemexType annotation on text queries. + * + * It generates queries from text string. Approximate similar matches of + * queries are looked up using the Gazetteer created by the hypotheses. + * + * The similar matches are added as values to nemex.NemexType annotation. + * + * @param JCas + * textView The text view, which holds the text to be analyzed. + * @param HashMap + * queryMap The map of unique query Id and + * query (dictionary entry) String + * @param HashMap + * > queryIndex Inverted index + * from queries to the hypotheses and offsets which generate the + * query. + * @return + */ + private void annotateSubstring(JCas textView, + HashMap queryMap, + HashMap> queryIndex) { + + String content = textView.getDocumentText().toLowerCase(); + content = content.replaceAll(" ", "#"); + String str = new String(); + List values = new ArrayList(); + for (int i = 0; i < content.length(); i++) + for (int j = i + 1; j <= content.length(); j++) { + str = content.substring(i, j); + + try { + values = NEMEX_A.checkSimilarity(str, gazetteerFilePath, + similarityMeasure, similarityThreshold); + if (values.size() > 0) { + NemexType textAnnot = addNemexAnnotation(textView, + values, i, j); + addAlignmentLink(textAnnot, textView, i, j, queryMap, + queryIndex); + } + } catch (GazetteerNotLoadedException e) { + logger.info("Could not load the gazetteer"); + e.printStackTrace(); + } + } + + } + + /** + * This method adds nemex.NemexType annotation on queries. + * + * Annotations with on a given view, startOffset, endOffset and value are + * added to the index. + * + * @param JCas + * view The view which contains the text to be annotated + * @param List + * entry The values of the nemex.NemexType annotation + * @param int startOffset The startOffset of the nemex.NemexType annotation + * @param int endOffset The endOffset of the nemex.NemexType annotation + * @return NemexType The added nemex.NemexType annotation + */ + private NemexType addNemexAnnotation(JCas view, List entry, + int startOffset, int endOffset) { + + logger.info("Within addNemexAnnotation function, adding annotation on view: " + + view.getDocumentText() + + " ,and adding entries " + + entry + + " as values from start offset " + + startOffset + + " to end offset " + endOffset); + + try { + NemexType annot = new NemexType(view, startOffset, endOffset); + StringArray valuesArray = new StringArray(view, entry.size()); + String[] entryArray = entry.toArray(new String[entry.size()]); + valuesArray.copyFromArray(entryArray, 0, 0, entryArray.length); + + logger.info("Setting values of annotation"); + annot.setValues(valuesArray); + + // add annotation to index of annotations + annot.addToIndexes(); + return annot; + } catch (Exception e) { + logger.info("Could not generate NemexType"); + e.printStackTrace(); + } + + return null; + + } + + /** + * This method adds alignment.Link annotation on nemex.NemexType. + * + * Two targets are generated - one on text and hypothesis view each. The two + * targets are linked by alignment.Link from H to T. + * + * @param NemexType + * textAnnot The text annotation for text Target + * @param JCas + * textView The JCas view which contains the text of a given pair + * @param int tStart The start offset of text annotation textAnnot + * @param int tEnd The end offset of text annotation textAnnot + * @param HashMap + * queryMap The map of unique query Id and + * query (dictionary entry) String + * @param HashMap + * > queryIndex Inverted index + * from queries to the hypotheses and offsets which generate the + * query. + * @return + */ + private void addAlignmentLink(NemexType textAnnot, JCas textView, + int textStart, int textEnd, HashMap queryMap, + HashMap> queryIndex) { + String[] values = textAnnot.getValues().toStringArray(); + for (int i = 0; i < values.length; i++) { + + String query = values[i]; + + ArrayList hypotheses = queryIndex.get(query); + Iterator hypoIter = hypotheses.iterator(); + + while (hypoIter.hasNext()) { + QueryOffset hypothesis = hypoIter.next(); + JCas hypoView = hypothesis.getHypothesisView(); + int hypoStart = hypothesis.getStartOffset(); + int hypoEnd = hypothesis.getEndOffset(); + + addLink(textView, textStart, textEnd, hypoView, hypoStart, + hypoEnd); + } + + } + + } + + /** + * This method adds alignment.Link annotation between text and hypothesis + * target + * + * @param JCas + * tView The JCas view which contains the text of a given pair + * @param int tStart The start offset of the text annotation to be used as + * Target + * @param int tEnd The end offset of text annotation to be used as Target + * @param JCas + * hView The JCas view which contains the hypothesis of a given + * pair + * @param int hStart The start offset of hypothesis annotation to be used as + * Target + * @param int hEnd The end offset of the hypothesis annotation to be used as + * Target + * @return + */ + + private void addLink(JCas tView, int tStart, int tEnd, JCas hView, + int hStart, int hEnd) { + + // Prepare the Target instances + Target textTarget = new Target(tView); + Target hypoTarget = new Target(hView); + + for (NemexType ntype : JCasUtil.select(tView, NemexType.class)) { + + if ((ntype.getBegin() == tStart) && (ntype.getEnd() == tEnd)) { + Target tg = new Target(tView); + + FSArray tAnnots = new FSArray(tView, 1); + tAnnots.set(0, ntype); + + tg.setTargetAnnotations(tAnnots); + tg.setBegin(ntype.getBegin()); + tg.setEnd(ntype.getEnd()); + tg.addToIndexes(); + + textTarget = tg; + } + } + + for (NemexType ntype : JCasUtil.select(hView, NemexType.class)) { + if ((ntype.getBegin() == tStart) && (ntype.getEnd() == tEnd)) { + Target tg = new Target(hView); + FSArray hAnnots = new FSArray(hView, 1); + hAnnots.set(0, ntype); + tg.setTargetAnnotations(hAnnots); + tg.setBegin(ntype.getBegin()); + tg.setEnd(ntype.getEnd()); + tg.addToIndexes(); + hypoTarget = tg; + } + } + + // Mark an alignment.Link and add it to the hypothesis view + Link link = new Link(hView); + link.setTSideTarget(textTarget); + link.setHSideTarget(hypoTarget); + + // Set the link direction + link.setDirection(Direction.HtoT); + + // Set strength according to the nemex-a threshold + link.setStrength(this.similarityThreshold); + + // Add the link information + link.setAlignerID("NemexA"); + link.setAlignerVersion("1.0"); + link.setLinkInfo("nemex-results"); + + // Mark begin and end according to the hypothesis target + link.setBegin(hypoTarget.getBegin()); + link.setEnd(hypoTarget.getEnd()); + + // Add to index + link.addToIndexes(); + } + + @Override + public String getComponentName() { + return this.getClass().getName(); + } + + @Override + public String getInstanceName() { + return this.gazetteerFilePath; + } + + private final static Logger logger = Logger.getLogger(NemexAligner.class); + private String gazetteerFilePath; + private String delimiter; + private Boolean delimiterSwitchOff; + private int nGramSize; + private Boolean ignoreDuplicateNgrams; + + private double similarityThreshold; + private String similarityMeasure; + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/nemex/QueryOffset.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/nemex/QueryOffset.java new file mode 100644 index 00000000..7b653a30 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/nemex/QueryOffset.java @@ -0,0 +1,40 @@ +package eu.excitementproject.eop.core.component.alignment.nemex; + +import org.apache.uima.jcas.JCas; + +/** + * + * This class is used to store the information for any query. It stores the view + * of the hypothesis which generates a query, along with start and end offsets + * of the query. + * + * @author Madhumita + * @since June 2014 + * + */ + +public class QueryOffset { + + JCas hypoView; + int startOffset; // inclusive + int endOffset; // exclusive + + public QueryOffset(JCas hView, int start, int end) { + hypoView = hView; + startOffset = start; + endOffset = end; + } + + public JCas getHypothesisView() { + return hypoView; + } + + public int getStartOffset() { + return startOffset; + } + + public int getEndOffset() { + return endOffset; + } + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/IdenticalLemmaPhraseLinker.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/IdenticalLemmaPhraseLinker.java new file mode 100644 index 00000000..698951bd --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/IdenticalLemmaPhraseLinker.java @@ -0,0 +1,381 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.alignment.Link; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +//import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * This is a surface level aligner that aligns "identical lemma sequences" found in + * TextView and HypothesisView. + * + * The module add Alignment.Link instances where its target holds token sequences (longer than 1 + * tokens). The two token sequences (one in T, the other in H) are linked only if they have + * identical lemma sequences. + * + * The module is language-free (you can pass CAS with any language): it simply trust the + * annotated lemma annotations to identify "same word". + * + * Note that, the module will annotate only the "longest lemma sequence". That is, + * if the CAS has T: "I have a dog.", H: "She has a dog too". + * It module will add only *one link* that connects three tokens of T ([have a dog]) to + * three tokens of H ([has a dog]). It won't link (have -> has), or (dog -> dog). + * TODO: FIXIT - this is only partially true -- each word in H side are checked again and again. + * + * ( Also note that, the module does not annotate "function words only" sequences. That is + * it will add links between "to emphasize" -> "to emphasize", but not "to the" -> "to the". ) + * + * Naturally, the module depends on the existence of "Lemma" annotations. If there is no Lemma + * in the give CAS, it will raise an exception. + * + * @author Tae-Gil Noh + * + */ +/** + * @author tailblues + * + */ +public class IdenticalLemmaPhraseLinker implements AlignmentComponent { + + public IdenticalLemmaPhraseLinker() throws AlignmentComponentException + { +// // initialize nonContentPOS map +// isNonContentPos = new HashMap(); +// for (String s : nonContentPOSes) +// { +// isNonContentPos.put(s, true); +// } + } + + + @Override + public void annotate(JCas aJCas) throws AlignmentComponentException { + + + if (aJCas == null) + throw new AlignmentComponentException("annotate() got a null JCas object."); + + JCas textView; + JCas hypoView; + try { + textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + } + catch (CASException e) + { + throw new AlignmentComponentException("Failed to access the Two views (TEXTVIEW, HYPOTHESISVIEW)", e); + } + + logger.info("annotate() called with a JCas with the following T and H; "); + logger.info("TEXT: " + textView.getDocumentText()); + logger.info("HYPO: " + hypoView.getDocumentText()); + + // Note: we *do not* do any language check in this module. + // (But we do check the existence of lemma and token, since that's what we need) + + // Get Token lists, TextTokens and HypoTokens. in order of appearance. + // ok. work. first; + // get all Tokens (by appearing orders...) + Collection t; + t = JCasUtil.select(textView, Token.class); + Token[] tTokens = t.toArray(new Token[t.size()]); + t = JCasUtil.select(hypoView, Token.class); + Token[] hTokens = t.toArray(new Token[t.size()]); + + // matching result will be written here... + int[] matchingPhraseStartLocations = new int[hTokens.length]; // a value represent n-th token of TSide. . + int[] matchingPhraseLengths = new int[hTokens.length]; // again, a value here means token length. + + // Okay, we have two list of tokens (that has access to lemma & pos) + // T and H. + // Okay, we start on H sequence, pos = 0 (first word). + // we start finding "longest identical sequence" from the position this pos. + + for (int i=0; i < hTokens.length; i++) + // loop on H tokens, i is each possible "start" position for phrase. + { + int bestMatchTextPosition = -1; // -1 == we have no match + int bestMatchLength = 0; + + for (int j=0; j < tTokens.length; j++) + { // j iterates on text tokens ... + + int currentMatchLen = 0; + + // call, matchOnPositions(). This utility method returns + // the length of "maximum" identical sequence. + // 0 if, match didn't even start on the position. + currentMatchLen = maxMatchOnPositions(i,j, hTokens, tTokens); + if (currentMatchLen > bestMatchLength) + { + bestMatchTextPosition = j; + bestMatchLength = currentMatchLen; + } + } + + // record the best match for this position (i) + matchingPhraseStartLocations[i] = bestMatchTextPosition; + matchingPhraseLengths[i] = bestMatchLength; + + } + + // post processing: let's save only the "maximum coverage" cases. (e.g. + // if this token has already been covered with identical (or longer) pattern, + // let's ignore "less-covered term". (e.g. when we have [this very moment -> this very moment] + // we ignore [very moment -> very moment]. ) + + int lastBestMatchPos = -1; + int lastBestMatchLen = 0; + int[] finalMatchingPhraseStartLocations = new int[hTokens.length]; // a value represent n-th token of TSide. . + int[] finalMatchingPhraseLengths = new int[hTokens.length]; // again, a value here means token-sequence length. + + for(int i=0; i < hTokens.length; i++) + { + int bestMatchPos = matchingPhraseStartLocations[i]; + int bestMatchLen = matchingPhraseLengths[i]; + + if ( (bestMatchPos == (lastBestMatchPos + 1)) && (lastBestMatchLen == (bestMatchLen + 1)) ) // essentially, previous one covered this, with exactly same sequence... + { // if that's the case, we ignore this link + finalMatchingPhraseStartLocations[i] = -1; + finalMatchingPhraseLengths[i] = 0; + } + else + { // otherwise, use it as is + finalMatchingPhraseStartLocations[i] = matchingPhraseStartLocations[i]; + finalMatchingPhraseLengths[i] = matchingPhraseLengths[i]; + } + + lastBestMatchPos = bestMatchPos; + lastBestMatchLen = bestMatchLen; + } + + // Okay. we have the full information in the two arrays. + // matchingPhraseStartLocation and matchingPhraseLength + // -1 means none matching. + + // Part two. annotating match with alignment.Link. + // We do this by calling a utility method with the above information. + addLinkAnnotations(aJCas, finalMatchingPhraseStartLocations, finalMatchingPhraseLengths, tTokens, hTokens); + + } + + /** + * A utility method that matches the "longest" lemma match + * on the given positions (on the two arrays) + * + * A call with (i,j, iArray, jArray) means + * "compare and find me the longest lemma match + * that starts on position i of iArray, and on position j of jArray" + * + * @param i + * @param j + * @param iArray + * @param jArray + * @return + * @throws AlignmentComponentException + */ + public static int maxMatchOnPositions(int i, int j, Token[] iArray, Token[] jArray) throws AlignmentComponentException + { + // boundary check, (is it valid?) + // if not, return 0. (no match) + if ( (i >= iArray.length) || (j >= jArray.length) ) + { + return 0; + } + + // well, try match. + Lemma iLemma = iArray[i].getLemma(); + Lemma jLemma = jArray[j].getLemma(); +// POS iPos = iArray[i].getPos(); +// POS jPos = jArray[j].getPos(); + + // sanity chcek + //if ((iLemma == null) || (jLemma == null) || (iPos == null) || (jPos == null)) + if ((iLemma == null) || (jLemma == null)) + { + throw new AlignmentComponentException("The JCas must have Lemmas and POSes annotated (connected) to Tokens."); + } + + if (iLemma.getValue().equals(jLemma.getValue())) + { // we got a match. - add 1, and recurse. + return (1 + maxMatchOnPositions(i+1, j+1, iArray, jArray)); + } + else + { // no match. + return 0; + } + } + + /** + * A utility method, that adds Alignment.Link instances for the given + * information. + * + * @param aJCas + * @param matchingPhraseStartLocationsOnText index "n" of this array is for n-th token of HSide. The value means m-th token on Tside. -1 means, no match. + * @param matchingPhraseLengths index "n" of this array is for n-th token of HSide. The value means length of matching tokens. + */ + private static void addLinkAnnotations(JCas aJCas, int[] matchingPhraseStartLocationsOnText, int[] matchingPhraseLengths, Token[] tTokens, Token hTokens[]) throws AlignmentComponentException + { + logger.debug("addLinnkAnnotations() called with the following info:"); + logger.debug("matchingPhraseStartingLocationsOnText:" + Arrays.toString(matchingPhraseStartLocationsOnText)); + logger.debug("matchingPhraseLengths:" + Arrays.toString(matchingPhraseLengths)); + + int countNewLinks = 0; + int ignoredNoncontentMatches = 0; + // Okay, we have enough information. + // Add alignment.Link annotations by utilizing static method + // MeteorPhraseResourceAligner.addOneAlignmentLinkOnTokenLevel(JCas textView, JCas hypoView, int fromBegin, int fromEnd, int toBegin, int toEnd, Link.Direction dir) throws CASException + + for (int i=0; i < matchingPhraseStartLocationsOnText.length; i++) + { + // i-th Token of Hypothesis, has no matching identical lemma word/phrase on Text. Pass. + if (matchingPhraseStartLocationsOnText[i] == -1) + continue; + + // The best (longest) "identically matching" lemma-sequence of current token (ith, on Hypothesis) + // is starting on "startingTokenIdx" on TextTokens, and ends on "endingTokenIdx". + int startingTokenIdx = matchingPhraseStartLocationsOnText[i]; + int endingTokenIdx = startingTokenIdx + matchingPhraseLengths[i] - 1; + + String logstring = ""; + for(int j=0; j < matchingPhraseLengths[i]; j++) + { + logstring += hTokens[i+j].getCoveredText() + " "; + } + + logger.debug("addLinkAnnotations: considering the following sequence" + "\"" + logstring + "\""); + + // check exclusion case. + if (containsOnlyNonContentPOSes (Arrays.copyOfRange(tTokens, startingTokenIdx, endingTokenIdx + 1))) + { + logger.debug("will not add an alignment.Link for this sequence."); + ignoredNoncontentMatches ++; + continue; + } + + // consider: remove punctuations at the ending of a sequence? hmm. maybe not. if to do so, here is the place + // yet another a boolean asking method.. + + logger.debug("Adding an alignment.Link for the sequence."); + + // Okay. it is normal, so let's prepare to add Token level alignment.Link + int tSideBegin; + int tSideEnd; + int hSideBegin; + int hSideEnd; + + try { + tSideBegin = tTokens[startingTokenIdx].getBegin(); + tSideEnd = tTokens[endingTokenIdx].getEnd(); + hSideBegin = hTokens[i].getBegin(); + hSideEnd = hTokens[i + matchingPhraseLengths[i] -1].getEnd(); + } catch (ArrayIndexOutOfBoundsException e ) + { + throw new AlignmentComponentException("Internal integrity failure: internal logic of annotate() generated wrong parameter for the utility static method.", e); + } + + // Now we can add Link itself... Borrowing a public static utility method from another module. + + try { + JCas textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + JCas hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + Link.Direction d = Link.Direction.Bidirection; // since it is "identical". + + Link aNewLink = MeteorPhraseResourceAligner.addOneAlignmentLinkOnTokenLevel(textView, hypoView, tSideBegin, tSideEnd, hSideBegin, hSideEnd, d); + + // as the Javadoc of the above utility method says, we need to add + // The caller must do after the call .setStrength() .setAlignerID() .setAlignerVersion() .setLinkInfo(). (Also groupLabel, if using that) - (But this method does add the new Link to CAS annotation index) + aNewLink.setStrength(DEFAULT_LINK_STR); + aNewLink.setAlignerID(ALIGNER_ID); + aNewLink.setAlignerVersion(ALIGNER_VER); + aNewLink.setLinkInfo(ALIGNER_LINK_INFO); + + countNewLinks++; + + } catch (CASException e) + { + throw new AlignmentComponentException("Adding link instance failed with a CAS Exception. Something wasn't right on the input CAS.", e); + } + } + + logger.info("added " + countNewLinks + " new links on the CAS" + " (ignored " + ignoredNoncontentMatches + " function-word only possible links)"); + } + + + @Override + public String getComponentName() { + return this.getClass().getName(); // return class name as the component name + } + + @Override + public String getInstanceName() { + return null; // this module does not support multiple-instances (e.g. with different configurations) + } + + private static Boolean containsOnlyNonContentPOSes(Token[] tokenArr) throws AlignmentComponentException + { + logger.debug("checking non content POSes only or not: "); + + String logline=""; + Boolean nonContentPOSesOnly = true; + for(Token t : tokenArr) + { + POS p = t.getPos(); + if (p == null) + { + throw new AlignmentComponentException("Unable to Process this CAS: There is one (or more) token without POS annotation. The process requires POS and Lemma annotated."); + } + String s = p.getType().toString(); + String typeString = s.substring(s.lastIndexOf(".") + 1); + logline += t.getCoveredText() + "/" + typeString + ", "; + if (!(isNonContentPos.containsKey(typeString)) ) + { + nonContentPOSesOnly = false; + // break; // no need to continue. + } + } + logger.debug(logline + " => " + nonContentPOSesOnly.toString()); + + return nonContentPOSesOnly; + } + + // logger + private final static Logger logger = Logger.getLogger(IdenticalLemmaPhraseLinker.class); + + + // Non-configurable, (hard-coded) settings. + // non Content POS types. (among DKPro POS types that we use) + // Punctuation, Preposition, Others, Conjunction, and Articles. + final private static String[] nonContentPOSes = {"PUNC", "PP", "O", "CONJ", "ART"}; + + public static Map isNonContentPos = new HashMap(); + static { + // initialize nonContentPOS map + isNonContentPos = new HashMap(); + for (String s : nonContentPOSes) + { + isNonContentPos.put(s, true); + } + } + + // meta-information that will be added on link instances added by the module. + final private static double DEFAULT_LINK_STR = 1.0; + final private static String ALIGNER_ID = "IdenticalLemmas"; + final private static String ALIGNER_VER = "1.0"; + final private static String ALIGNER_LINK_INFO = "SameLemma"; + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerDE.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerDE.java new file mode 100644 index 00000000..95e54999 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerDE.java @@ -0,0 +1,70 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + + +/** + * + * This class provides alignment.Link for phrases in the given JCas. + * From TextView phrases to Hypothesis phrases. + * (This is a step-1 aligner -- that is, lookup-aligner, adds all links, does not select/resolve the best one. + * It simply adds everything the underlying resource knows.) + * The resource it is based on is: the German Meteor Paraphrase table. (for the moment, from Meteor 1.5 release). + * + * A AlignmentAnnotator component, that will add Token level alignment.Links (the two Targets of + * the link instance hold one, or more tokens.) + * + * + * @author Tae-Gil Noh + * @since June 2014 + * + */ + +public class MeteorPhraseLinkerDE extends MeteorPhraseResourceAligner { + + public MeteorPhraseLinkerDE() throws AlignmentComponentException + { + // zero configuration --- this component loads Meteor English resource and + // there is nothing to configure. + // German paraphrase table from Meteor 1.5, where the maximum length phrase has 7 words + super("/meteor-1.5/data/paraphrase-de", 7); // Note that this data file is already provided and added in CORE POM dependency. + + // set language ID for language check + languageId = "DE"; + + // override link metadata + this.alignerID = "MeteorPhraseLink"; + this.alignerVersion = "MeteorGermanPP15"; + this.linkInfo = "paraphrase"; + } + + public void annotate(JCas aJCas) throws AlignmentComponentException + { + // language check + String tViewLangId; // language Ids in two views of the given CAS + String hViewLangId; + try + { + tViewLangId = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW).getDocumentLanguage(); + hViewLangId = aJCas.getView(LAP_ImplBase.TEXTVIEW).getDocumentLanguage(); + } + catch(CASException e) + { + throw new AlignmentComponentException("Accessing text/hypothesis view failed: CAS object might not be a correct one."); + } + + if (! ( languageId.equalsIgnoreCase(tViewLangId) && languageId.equalsIgnoreCase(hViewLangId)) ) + { + throw new AlignmentComponentException("Language ID mismatch: this component provides service for " + languageId + ", but received a JCas with " + tViewLangId + "/" + hViewLangId); + } + + // call super, which does the actual work. + super.annotate(aJCas); + } + + private final String languageId; +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerEN.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerEN.java new file mode 100644 index 00000000..c74fa6b9 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerEN.java @@ -0,0 +1,69 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * + * This class provides alignment.Link for phrases in the given JCas. + * From TextView phrases to Hypothesis phrases. + * (This is a step-1 aligner -- that is, lookup-aligner, adds all links, does not select/resolve the best one. + * It simply adds everything the underlying resource knows.) + * The resource it is based on is: the English Meteor Paraphrase table. (for the moment, from Meteor 1.5 release). + * + * A AlignmentAnnotator component, that will add Token level alignment.Links (the two Targets of + * the link instance hold one, or more tokens.) + * + * + * @author Tae-Gil Noh + * @since June 2014 + * + */ +public class MeteorPhraseLinkerEN extends MeteorPhraseResourceAligner { + + public MeteorPhraseLinkerEN() throws AlignmentComponentException + { + // zero configuration --- this component loads Meteor English resource and + // there is nothing to configure. + // English paraphrase table from Meteor 1.5, where the maximum length phrase has 7 words + super("/meteor-1.5/data/paraphrase-en", 7); // Note that this data file is already provided and added in CORE POM dependency. + + // set language ID for language check + languageId = "EN"; + + // override link metadata + this.alignerID = "MeteorPhraseLink"; + this.alignerVersion = "MeteorEnglishPP15"; + this.linkInfo = "paraphrase"; + } + + public void annotate(JCas aJCas) throws AlignmentComponentException + { + // language check + String tViewLangId; // language Ids in two views of the given CAS + String hViewLangId; + try + { + tViewLangId = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW).getDocumentLanguage(); + hViewLangId = aJCas.getView(LAP_ImplBase.TEXTVIEW).getDocumentLanguage(); + } + catch(CASException e) + { + throw new AlignmentComponentException("Accessing text/hypothesis view failed: CAS object might not be a correct one."); + } + + if (! ( languageId.equalsIgnoreCase(tViewLangId) && languageId.equalsIgnoreCase(hViewLangId)) ) + { + throw new AlignmentComponentException("Language ID mismatch: this component provides service for " + languageId + ", but received a JCas with " + tViewLangId + "/" + hViewLangId); + } + + // call super, which does the actual work. + super.annotate(aJCas); + } + + private final String languageId; + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerIT.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerIT.java new file mode 100644 index 00000000..66332541 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerIT.java @@ -0,0 +1,71 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * + * This class provides alignment.Link for phrases in the given JCas. + * From TextView phrases to Hypothesis phrases. + * + * (This is a step-1 aligner -- that is, lookup-aligner, adds all links, does not select/resolve the best one. + * It simply adds everything the underlying resource knows.) + * + * The underlying resource for this class: the Italian Paraphrase table, extracted from parallel corpus + * by Viviana Antonela Nastase (http://hlt.fbk.eu/people/profile/nastase) + * + * A AlignmentAnnotator component, that will add Token level alignment.Links (the two Targets of + * the link instance hold one, or more tokens.) + * + * @author Tae-Gil Noh + * @since June 2014 + * + */ +public class MeteorPhraseLinkerIT extends MeteorPhraseResourceAligner { + + public MeteorPhraseLinkerIT() throws AlignmentComponentException + { + // zero configuration --- this component loads Meteor English resource and + // there is nothing to configure. + // Italian paraphrase table from Vivi's resource, here we set maximum length of phrase as 7 words (tokens) + super("/vivi-paraphrase/data/paraphrase-it", 7); // Note that this data file is already provided and added in CORE POM dependency. + + // set language ID for language check + languageId = "IT"; + + // override link metadata + this.alignerID = "MeteorPhraseLink"; + this.alignerVersion = "FBKViviItlianPP10"; + this.linkInfo = "paraphrase"; + } + + public void annotate(JCas aJCas) throws AlignmentComponentException + { + // language check + String tViewLangId; // language Ids in two views of the given CAS + String hViewLangId; + try + { + tViewLangId = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW).getDocumentLanguage(); + hViewLangId = aJCas.getView(LAP_ImplBase.TEXTVIEW).getDocumentLanguage(); + } + catch(CASException e) + { + throw new AlignmentComponentException("Accessing text/hypothesis view failed: CAS object might not be a correct one."); + } + + if (! ( languageId.equalsIgnoreCase(tViewLangId) && languageId.equalsIgnoreCase(hViewLangId)) ) + { + throw new AlignmentComponentException("Language ID mismatch: this component provides service for " + languageId + ", but received a JCas with " + tViewLangId + "/" + hViewLangId); + } + + // call super, which does the actual work. + super.annotate(aJCas); + } + + private final String languageId; + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseResourceAligner.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseResourceAligner.java new file mode 100644 index 00000000..dce59885 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseResourceAligner.java @@ -0,0 +1,372 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.FSArray; +import org.uimafit.util.JCasUtil; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; + +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.Target; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * This component annotates a JCas based on Meteor (or Meteor-like) paraphrase + * resource. This class itself is not supposed to be used by end-users. + * A thin-wrapper per each language + resource will be provided for end users, and that + * end user class would be extending this class. + * + * Actual look up depends on MeteorPhraseTable class. + * annotate() adds alignment.Link/Target instances that link Token annotations. + * Phrase-to-Phrase link is represented by alignment.Targets that holds more than one Token. + * + * The class has some static methods that might be useful for other aligners. For example, + * "addOneAlignmentLinkOnTokenLevel()" is public, and might be useful. + * + * All comparison / match is done on surface (Sofa text) level, and then the matching + * positive links are established on CAS Tokens. Also note that all comparisons are done + * as lower-case, as the underlying Meteor resource requires. + * + * TODO: groupLabel part is ignored in the class yet. To be added. + * + * Note: You can easily add a paraphrase linker, from any "Meteor-like paraphrase data file" in resource path. + * For an usage example, check MeteorPhraseLinkerEN class, which provides end-user component with Meteor English table. + * + * @author Tae-Gil Noh + * @since June 2014 + */ +public class MeteorPhraseResourceAligner implements AlignmentComponent { + + public MeteorPhraseResourceAligner(String resourcePath, int maxPhraseLength) throws AlignmentComponentException + { + // initialize private final variables + //logger = Logger.getLogger(this.getClass().toString()); // we use a static instance + this.resourcePath = resourcePath; + this.maxPhraseLength = maxPhraseLength; + + // load table. + try { + this.table = new MeteorPhraseTable(resourcePath); + } + catch (IOException e) + { + throw new AlignmentComponentException("Loading the paraphrase table with the following resource path have failed: " + resourcePath, e); + } + } + + public void annotate(JCas aJCas) throws AlignmentComponentException + { + // intro log + logger.info("annotate() called with a JCas with the following T and H; "); + + if (aJCas == null) + throw new AlignmentComponentException("annotate() got a null JCas object."); + + JCas textView; + JCas hypoView; + try { + textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + // note - language check should be done by class that extends this class + } + catch (CASException e) + { + throw new AlignmentComponentException("Failed to access the Two views (TEXTVIEW, HYPOTHESISVIEW)", e); + } + + logger.info("TEXT: " + textView.getDocumentText()); + logger.info("HYPO: " + hypoView.getDocumentText()); + + int countAnnotatedLinks = 0; + + // get all candidates on Text view + List phraseCandidatesInTextView = getPhraseCandidatesFromSOFA(textView, maxPhraseLength); + String hypoViewSofaText = hypoView.getDocumentText().toLowerCase(); + String textViewSofaText = textView.getDocumentText().toLowerCase(); + + // for each candidate, query table. + for(String cand : phraseCandidatesInTextView) + { + List rhsAndProbList = table.lookupParaphrasesFor(cand); + + // if there is match (candidate is on table as LHS); + if (rhsAndProbList.size() > 0) + { + // check each RHS if that RHS does exists on Hypothesis view SOFA text. + for(MeteorPhraseTable.ScoredString tuple : rhsAndProbList) + { + String lhs = cand; + String rhs = tuple.getString(); + if (hypoViewSofaText.contains(rhs)) + { + // Okay. rhs seems to exist in HYPOTHESIS view SOFA. (as a token, or as a sub-token -> we will add only if it is a full Token) + + // Find out locations and annotate one (or more) phrase Links. + // (note that multiple link is only possible if same lhs or rhs occurrs multiple + // time on text or hypothesis) + List rhsOccurrences = getOccurrencePoints(hypoViewSofaText, rhs); + for(int rhsBegin : rhsOccurrences) + { + List lhsOccurrences = getOccurrencePoints(textViewSofaText, lhs); + for (int lhsBegin : lhsOccurrences) + { + try { + // generate a new Link with Two Targets + Link aLink = addOneAlignmentLinkOnTokenLevel(textView, hypoView, lhsBegin, lhsBegin + lhs.length(), rhsBegin, rhsBegin + rhs.length(), Link.Direction.TtoH); + + // Do we have tokens for them? --- check aLink created. + // note that the link is created only RHS exist as a token in Hypothesis View. + if (aLink == null) + { // pass. Hypothesis Text does include RHS, but only as a sub-token. (say, rhs is "Jew", within token "Jewish". ) + continue; + } + + // add Meta-information on the Link. + // setStrength() .setAlignerID() .setAlignerVersion() .setLinkInfo(). (Also groupLabel, if using that) + aLink.setStrength(tuple.getScore()); + aLink.setAlignerID(this.alignerID); + aLink.setAlignerVersion(this.alignerVersion); + aLink.setLinkInfo(this.linkInfo); + countAnnotatedLinks ++; + } + catch(CASException e) + { + throw new AlignmentComponentException("JCas access failed while adding Links", e); + } + } + } + } + } + } + } + + // outro log + logger.info("annotate() added " + countAnnotatedLinks + " links to the CAS." ); + + } + + /** + * This method is a helper utility that is required to look up Meteor Phrase tables. + * + * Basically, returns all possible phrase candidates up to N words in a List + * + * The method uses Token annotation in JCas to generate possible candidates. Thus, + * a tokenization annotator should have annotated this JCas before. + * + * @param JCas aJCas The view, that holds the sentence(s) to be analyzed. + * @param int uptoN The maximum number of + * @return + */ + public static List getPhraseCandidatesFromSOFA(JCas aJCas, int uptoN) + { + // sanity check + assert(aJCas !=null); + assert(uptoN > 0); + + // list for result, + List result = new ArrayList(); + + // ok. work. first; + // get all Tokens (by appearing orders...) + Collection t = JCasUtil.select(aJCas, Token.class); + Token[] tokens = t.toArray(new Token[t.size()]); + + // then; + // for each Token, start uptoN process. + for(int i=0; i < tokens.length; i++) + { + for(int j=0; (j < uptoN) && (i+j < tokens.length); j++ ) + { + Token leftEnd = tokens[i]; + Token rightEnd = tokens[i+j]; + String text = aJCas.getDocumentText().substring(leftEnd.getBegin(), rightEnd.getEnd()); + // and store in lower case. + result.add(text.toLowerCase()); + } + } + + // done + // all candidates are store here. + return result; + } + + /** + * Utility method that adds alignment.Links on Token level. + * Usage is about like this. + * + * + * example + *

+	 *                          1         2         3         4
+	 *                012345678901234567890123456789012345678901234567890
+	 * TEXTVIEW SOFA  He went there in person to dwell on the importance.  
+	 * HYPOVIEW SOFA  He went there to explain the significance. 
+	 * 
+ * + *

+ * And let's assume that we want to link "to dwell on the importance" (27 to 49 on TEXTVIEW) + * to "to explain the significance" (14 to 40 on HYPOVIEW). Then a call like the following will + * make it happen. + * + * method(hypoview, textview, 27, 49, 14, 40) + * + * With this call, all tokens that are covering TEXT SOFA text position 24 - 49 will be grouped in alignment.Group, + * and all tokens that are covering HYPO SOFA text position 14 - 40 will be grouped in another alignment.Group + * and they will be linked by alignment.Link. + * + *

+ * NOTE + * - This method *does not* add any "meta-level" information, such as aligner ID, etc. Those has to be added by the caller on the returned new Link instance. + * - The caller must do after the call .setStrength() .setAlignerID() .setAlignerVersion() .setLinkInfo(). (Also groupLabel, if using that) + * - (But this method does add the new Link to CAS annotation index) + * + * @param viewFrom The view where tokens will be grouped as a target for alignment.link From. + * @param viewTo The view where tokens will be grouped as a target for alignment.link Target. + * @param fromStart start position of Tokens in viewFrom + * @param fromEnd end position of Tokens in viewFrom + * @param toStart start position of Tokens in viewTo + * @param toEnd end position of Tokens in viewTo + * @return Link the successful call will return the newly generated Link instance. + */ + public static Link addOneAlignmentLinkOnTokenLevel(JCas textView, JCas hypoView, int fromBegin, int fromEnd, int toBegin, int toEnd, Link.Direction dir) throws CASException + { + // declare what is being done on log ... + logger.debug("got request to add link from TEXT -> HYPO group"); + logger.debug("TEXT group: " + fromBegin + " to " + fromEnd + ":" + textView.getDocumentText().substring(fromBegin, fromEnd)); + logger.debug("HYPO group: " + toBegin + " to " + toEnd +":" + hypoView.getDocumentText().substring(toBegin, toEnd)); + //logger.debug("TEXT SOFA: " + viewFrom.getDocumentText()); + //logger.debug("HYPO SOFA: " + viewTo.getDocumentText()); + + // prepared two alignment Targets + // FROM side + //List tokens = JCasUtil.selectCovering(textView, Token.class, fromBegin, fromEnd); + List tokens = tokensBetween(textView, fromBegin, fromEnd); + Target textTarget = prepareOneTarget(textView, tokens); + + // TO side + //tokens = JCasUtil.selectCovering(hypoView, Token.class, toBegin, toEnd); + tokens = tokensBetween(hypoView, toBegin, toEnd); + Target hypoTarget = prepareOneTarget(hypoView, tokens); + + if ((textTarget == null) || (hypoTarget == null)) + { + logger.debug("no matching Tokens (probably rhs exist only as sub-token, not a full token). --- not making Link instance and returning null."); + return null; + } + + // Okay. we have two targets. Make one Link. + Link theLink = new Link(hypoView); + theLink.setTSideTarget(textTarget); + theLink.setHSideTarget(hypoTarget); + theLink.setDirection(dir); + + logger.debug("TSideTarget, " + textTarget.getTargetAnnotations().size() + " tokens, covers: " + textTarget.getCoveredText()); + logger.debug("HSideTarget, " + hypoTarget.getTargetAnnotations().size() + " tokens, covers: " + hypoTarget.getCoveredText()); + + theLink.setBegin(hypoTarget.getBegin()); + theLink.setEnd(hypoTarget.getEnd()); + + theLink.addToIndexes(); + + // The caller must do after the call + // .setStrength() + // .setAlignerID() + // .setAlignerVersion() + // .setLinkInfo(); + + return theLink; + } + + // a utility method used by addAlignmentLinnksOnTokenLevel + // Gets one View and a set of Tokens (of that view) and makes one alignment.Target + private static Target prepareOneTarget(JCas view, Collection tokens) + { + int countTokens = tokens.size(); + if (countTokens == 0) // check; null means no Tokens for target. + return null; + + Target aTarget = new Target(view); + + FSArray annots = new FSArray(view, countTokens); + aTarget.setTargetAnnotations(annots); + Iterator itr = tokens.iterator(); + int begin = -1; // I am using -1 as "not set yet". + int end=0; + for(int i=0; i < countTokens; i++) + { + Token t = itr.next(); + if (begin == -1) // if not set. + begin = t.getBegin(); + end = t.getEnd(); // we are assuming that collection tokens is ordered. + annots.set(i, t); + } + + aTarget.setBegin(begin); + aTarget.setEnd(end); + aTarget.addToIndexes(); + + return aTarget; + } + + private static List tokensBetween(JCas aJCas, int from, int to) + { + List tokenList = new ArrayList(); + + for (Token token: JCasUtil.select(aJCas, Token.class)) + { + if ( (token.getBegin() >= from) && (token.getEnd() <= to)) + { + tokenList.add(token); + } + } + return tokenList; + } + + public String getComponentName() + { + return this.getClass().getName(); + } + + public String getInstanceName() + { + return resourcePath; + } + + // a utility method + static List getOccurrencePoints(String holder, String substring) + { + List result = new ArrayList(); + int searchFrom = 0; + int begin; + while((begin = holder.indexOf(substring, searchFrom)) > 0) + { + int end = begin + substring.length(); + result.add(begin); + searchFrom = end; + } + return result; + } + + + private final String resourcePath; + private final MeteorPhraseTable table; + private final int maxPhraseLength; + + private final static Logger logger = Logger.getLogger(MeteorPhraseResourceAligner.class); + + // default link metadata, can be (or should be) overridden by subclasses. + protected String alignerID = "PhraseLink"; + protected String alignerVersion = "MeteorPhraseTable"; + protected String linkInfo = "paraphrase"; + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseTable.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseTable.java new file mode 100644 index 00000000..14ceff28 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseTable.java @@ -0,0 +1,154 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.HashMap; + +import org.apache.log4j.Logger; + +/** + * This class represents Meteor Phrase Table. + * + * Memo: + * - one interesting aspect is that Meteor Phrase Table is *not* symmetric. + * - (e.g. "12 candidate countries -> 12 candidates does exist", but not the other way around, etc) + * + * This table provides the capability to load and look up meteor-like phrase table + * by querying "LHS phrase" and get [(rhs, probability of lhs->rhs), ... ] + * + * + * @author Tae-Gil Noh + * @since June 2014 + * + */ +public class MeteorPhraseTable { + + public MeteorPhraseTable(String resourcePath) throws IOException + { + // initialize private final variables + logger = Logger.getLogger(this.getClass().toString()); + entryPairsAsMap = new HashMap>(); + + // start loading the table text from resource path + logger.info("Loading Meteor Paraphrase table from resource path: " + resourcePath); + final long loadStart = System.currentTimeMillis(); + + InputStream is = getClass().getResourceAsStream(resourcePath); + BufferedReader tableReader = new BufferedReader(new InputStreamReader(is)); + + int ec = 0; + String line1 = null; + while((line1 = tableReader.readLine()) != null) + { + Float prob = Float.parseFloat(line1); + String lhs = tableReader.readLine(); + String rhs = tableReader.readLine(); + + // no lhs yet in the map ? + if (!entryPairsAsMap.containsKey(lhs)) + { + // then, make the entry in the first-level map, + Map map = new HashMap(); + entryPairsAsMap.put(lhs, map); + } + + // add the (lhs, rhs, prob) + // lhs as the key for outer map, rhs as the key for inner map, + // and probability value is in the value of the inner map. + entryPairsAsMap.get(lhs).put(rhs, prob); + + ec++; + } + final long loadEnd = System.currentTimeMillis(); + final long duration = ( loadEnd - loadStart ) / 1000; + + logger.info("loading complelte, " + ec + " entries. (in " + duration + " seconds)") ; + + } + + /** + * Query the table; return possible paraphrases for the given phrase with score. + * + * Give phrase is treated as "Left". (LHS ->) and the returning values are + * (RHS, probability) + * + * @param phrase + * @return + */ + public List lookupParaphrasesFor(String phrase) + { + ArrayList phrList = new ArrayList(); + + // check if the phrase exist as LHS in the paraphrase table + if (!entryPairsAsMap.containsKey(phrase)) + { + // if not, no need to look into it. + // returning an empty list. + return phrList; + } + + // Okay. there is paraphrase rules applicable to this give LHS. + // return them all, as a list of (String, Score) tuples. + // [ (rhs1, probability for rhs1), (rhs2, probability for rhs2), ...] + Map mapForLhs = entryPairsAsMap.get(phrase); + Iterator itr = mapForLhs.keySet().iterator(); + while(itr.hasNext()) + { + String rhs = itr.next(); + Float prob = mapForLhs.get(rhs); + ScoredString rhsAndItsProb = new ScoredString(rhs, prob); + phrList.add(rhsAndItsProb); + } + + return phrList; + } + + /** + * A simple class that represents a tuple of String (that holds a phrase) + * and its score. + * + * ("A phrase", double value) + * + * @author Tae-Gil Noh + * + */ + public class ScoredString + { + public ScoredString(String phrase, double score) + { + this.string = phrase; + this.score = score; + } + + public double getScore() + { + return score; + } + + public String getString() + { + return string; + } + + private final double score; + private final String string; + } + + // internal data structures and variables + + // entryPairsAsMap holds all entries. + // single entry is: (lhs string, rhs string, probability float) + // + // lhs as the key for outer map, + // rhs as the key for inner map, + // and probability value is in the value of the inner map. + private final Map> entryPairsAsMap; + private final Logger logger; + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/package-info.java b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/package-info.java new file mode 100644 index 00000000..46dcb5cc --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/alignment/phraselink/package-info.java @@ -0,0 +1,13 @@ +/** + * + * This package holds various phrase-level aligners that adds Alignment.Link instances + * that connects phrases (including single words) on TEXTVIEW and HYPOTHESISVIEW. + * + * Currently (as of July 2014) it holds the following two types. + * - a set of phrase aligners from Meteor-like paraphrase tables: MeteorPhraseLinker[XX]. @see MeteorPhraseResourceAligner. + * - A phrase aligner that aligns based on Lemma-identity (links are added on the same sequence of lemmas). @see IdenticalLemmaPhraseLinker. + * + * @author Tae-Gil Noh + * @since June 2014 + */ +package eu.excitementproject.eop.core.component.alignment.phraselink; \ No newline at end of file diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/distance/FixedWeightEditDistance.java b/core/src/main/java/eu/excitementproject/eop/core/component/distance/FixedWeightEditDistance.java index 5fb43412..2f840d62 100644 --- a/core/src/main/java/eu/excitementproject/eop/core/component/distance/FixedWeightEditDistance.java +++ b/core/src/main/java/eu/excitementproject/eop/core/component/distance/FixedWeightEditDistance.java @@ -71,49 +71,49 @@ */ public abstract class FixedWeightEditDistance implements DistanceCalculation { - private static final String STOP_WORD_POS = "POS"; - private static final String STOP_WORD_LIST = "LIST"; - private static final String STOP_WORD_POS_LIST = "POS,LIST"; - private static final String IGNORE_CASE = "ignoreCase"; - private static final String NORMALIZATION_TYPE = "normalizationType"; - private static final String DEFAULT = "default"; - private static final String LONG = "long"; - private static final String PATH_STOP_WORD = "pathStopWordFile"; - private static final String STOP_WORD_TYPE = "stopWordRemoval"; + protected static final String STOP_WORD_POS = "POS"; + protected static final String STOP_WORD_LIST = "LIST"; + protected static final String STOP_WORD_POS_LIST = "POS,LIST"; + protected static final String IGNORE_CASE = "ignoreCase"; + protected static final String NORMALIZATION_TYPE = "normalizationType"; + protected static final String DEFAULT = "default"; + protected static final String LONG = "long"; + protected static final String PATH_STOP_WORD = "pathStopWordFile"; + protected static final String STOP_WORD_TYPE = "stopWordRemoval"; /** * weight for match */ - private double mMatchWeight; + protected double mMatchWeight; /** * weight for delete */ - private double mDeleteWeight; + protected double mDeleteWeight; /** * weight for insert */ - private double mInsertWeight; + protected double mInsertWeight; /** * weight for substitute */ - private double mSubstituteWeight; + protected double mSubstituteWeight; /** * the activated instance */ - private String instances; + protected String instances; /** * the resource */ @SuppressWarnings("rawtypes") - private List lexR; + protected List lexR; /** * stop word removal */ - private boolean stopWordRemovalPOS; - private boolean ignoreCase; + protected boolean stopWordRemovalPOS; + protected boolean ignoreCase; Set relations = new HashSet(); - private HashSet ignoreSet = null; - private String normalizationType; + protected HashSet ignoreSet = null; + protected String normalizationType; static Logger logger = Logger.getLogger(FixedWeightEditDistance.class.getName()); @@ -294,7 +294,7 @@ else if (language.equals("EN")) { } - private void initializeStopWordList(String path) { + protected void initializeStopWordList(String path) { File sourceFile = new File(path); try { BufferedReader br = new BufferedReader(new FileReader(sourceFile)); @@ -590,7 +590,7 @@ public Vector calculateScores(JCas jcas) throws ScoringComponentExceptio * @return the list of tokens in the CAS. * */ - private List getTokenSequences(JCas jcas) { + protected List getTokenSequences(JCas jcas) { List tokensList = new ArrayList(); @@ -782,10 +782,10 @@ public DistanceValue distance(List source, List target ) throws Ar // the entire source sequence. It is used to normalize distance values. double norm; - if(normalizationType.equalsIgnoreCase(DEFAULT)){ - norm = distanceTable[source.size()][0] + distanceTable[0][target.size()]; - }else{ + if(LONG.equalsIgnoreCase(normalizationType)){ norm = source.size() + target.size(); + }else { + norm = distanceTable[source.size()][0] + distanceTable[0][target.size()]; } // the normalizedDistanceValue score has a range from 0 (when source is identical to target), to 1 // (when source is completely different form target). @@ -796,7 +796,7 @@ public DistanceValue distance(List source, List target ) throws Ar } - private boolean compare(String tokenBaseForm, String tokenBaseForm2) { + protected boolean compare(String tokenBaseForm, String tokenBaseForm2) { if(ignoreCase){ return tokenBaseForm.equalsIgnoreCase(tokenBaseForm2); } @@ -825,7 +825,7 @@ private boolean compare(String tokenBaseForm, String tokenBaseForm2) { * @return the smaller of three double values * */ - private double minimum(double a, double b, double c) { + protected double minimum(double a, double b, double c) { return Math.min(Math.min(a, b), c); @@ -839,7 +839,7 @@ private double minimum(double a, double b, double c) { * * @throws LexicalResourceException */ - private void initializeWordnet(String path) throws LexicalResourceException { + protected void initializeWordnet(String path) throws LexicalResourceException { logger.info("Wordnet initialization ..."); @@ -869,7 +869,7 @@ private void initializeWordnet(String path) throws LexicalResourceException { * * @throws LexicalResourceException */ - private void initializeGermaNet(String path) throws LexicalResourceException { + protected void initializeGermaNet(String path) throws LexicalResourceException { logger.info("GermaNet initialization ..."); @@ -897,7 +897,7 @@ private void initializeGermaNet(String path) throws LexicalResourceException { * * @throws LexicalResourceException */ - private void initializeEnglishWikipedia(String dbConnection, String dbUser, String dbPasswd) throws LexicalResourceException { + protected void initializeEnglishWikipedia(String dbConnection, String dbUser, String dbPasswd) throws LexicalResourceException { logger.info("English Wikipedia initialization ..."); @@ -931,7 +931,7 @@ private void initializeEnglishWikipedia(String dbConnection, String dbUser, Stri * * @throws LexicalResourceException */ - private void initializeItalianWikipedia(String dbConnection, String dbUser, String dbPasswd) throws LexicalResourceException { + protected void initializeItalianWikipedia(String dbConnection, String dbUser, String dbPasswd) throws LexicalResourceException { logger.info("Italian Wikipedia initialization ..."); @@ -973,7 +973,7 @@ private void initializeItalianWikipedia(String dbConnection, String dbUser, Stri * @throws LexicalResourceException */ @SuppressWarnings("unchecked") - private boolean getRulesFromResource(String leftLemma, PartOfSpeech leftPos, + protected boolean getRulesFromResource(String leftLemma, PartOfSpeech leftPos, String rightLemma, PartOfSpeech rightPos) throws LexicalResourceException { //logger.info("leftLemma:" + leftLemma + " leftPos:" + leftPos + "\t" + "rightLemma:" + rightLemma + " " + "rightPos:" + rightPos); @@ -1007,7 +1007,7 @@ private boolean getRulesFromResource(String leftLemma, PartOfSpeech leftPos, * The EditDistanceValue class extends the DistanceValue * to hold the distance calculation result. */ - private class EditDistanceValue extends DistanceValue { + protected class EditDistanceValue extends DistanceValue { public EditDistanceValue(double distance, boolean simBased, double rawValue) { diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/distance/FixedWeightEditDistancewRedis.java b/core/src/main/java/eu/excitementproject/eop/core/component/distance/FixedWeightEditDistancewRedis.java new file mode 100644 index 00000000..cd4160fd --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/distance/FixedWeightEditDistancewRedis.java @@ -0,0 +1,216 @@ + +package eu.excitementproject.eop.core.component.distance; + +import java.util.List; +import java.util.Map; +import java.util.logging.Logger; + +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalResource; +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalResourceException; +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalRule; +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.exception.ComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.common.representation.partofspeech.PartOfSpeech; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationFile; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationParams; +import eu.excitementproject.eop.distsim.resource.SimilarityStorageBasedLexicalResource; + + +/** + * The FixedWeightedEditDistance class implements the DistanceCalculation interface. + * Given a pair of T-H, each of them represented as a sequences of tokens, the edit distance between + * T and H is the minimum number of operations required to convert T to H. + * FixedWeightedEditDistance implements the simplest form of weighted edit distance that simply uses a + * constant cost for each of the edit operations: match, substitute, insert, delete. + * + *

Relation to Simple Edit Distance

+ * Weighted edit distance agrees with edit distance as a distance assuming the following weights: + * match weight is 0, substitute, insert and delete weights are 1. + * + *

Symmetry

+ * If the insert and delete costs of a character are equal, then weighted edit distance will be + * symmetric. + * + *

Metricity

+ * If the match weight of all tokens is zero, then the distance between a token sequence + * and itself will be zero. + * + * @author Roberto Zanoli /w Redis part by Vivi + * + * + */ +public abstract class FixedWeightEditDistancewRedis extends FixedWeightEditDistance { + + + public static int ruleCounter = 0; + + static Logger logger = Logger.getLogger(FixedWeightEditDistancewRedis.class.getName()); + + + /** + * Construct a fixed weight edit distance with the following constant + * weights for edits: + * match weight is 0, substitute, insert and delete weights are + */ + public FixedWeightEditDistancewRedis() { + + super(); + } + + + /** + * Constructor used to create this object. + * + * @param config the configuration + * + */ + public FixedWeightEditDistancewRedis(CommonConfig config) throws ConfigurationException, ComponentException { + + super(config); + + logger.info("Creating an instance of " + this.getComponentName() + " ..."); + + String[] instancesList = instances.split(","); + + for (int i = 0; i < instancesList.length; i++) { + + String instance = instancesList[i]; + + if (instance.contains("redis")) { + try { + + initializeRedisResource(instance, config); + + } catch (LexicalResourceException e) { + logger.info("Problem initializing redis resource"); + throw new ComponentException(e.getMessage()); + } + } + } + + logger.info("done."); + + } + + + + public FixedWeightEditDistancewRedis(double mMatchWeight, + double mDeleteWeight, double mInsertWeight, + double mSubstituteWeight, boolean stopWordRemoval, String language, + Map resources) throws ConfigurationException, ComponentException { + // TODO Auto-generated constructor stub + super(mMatchWeight, mDeleteWeight, mInsertWeight, mSubstituteWeight, stopWordRemoval, language, resources); + } + + + /** + * Generates an instance of a Redis-based lexical resource, using the corresponding section from the configuration file + * + * @param config the EDA's configuration file + * @throws LexicalResourceException + */ + private void initializeRedisResource(String resourceType, CommonConfig config) throws LexicalResourceException { + + logger.info("Redis-based resource initialization : " + resourceType); + + try { + + ConfigurationFile confFile = new ConfigurationFile(config); + + ConfigurationParams confParams = confFile.getModuleConfiguration(resourceType); + + logger.info("Parameters: " + confParams.get("resource-name")); + + @SuppressWarnings("rawtypes") + LexicalResource resource = new SimilarityStorageBasedLexicalResource(confParams); + lexR.add(resource); + +/* try { + logger.info("LR test: "); + + PartOfSpeech pos = new ByCanonicalPartOfSpeech("V"); + + logger.info("Part of speech created: " + pos.getCanonicalPosTag()); + + @SuppressWarnings("rawtypes") + +// List rules = resource.getRulesForLeft("vedere", new ByCanonicalPartOfSpeech("V")); + List rules = new ArrayList(); +// List rules = resource.getRulesForLeft("vedere", null); + + rules = resource.getRulesForLeft("vedere", pos); + + if (rules != null) { + for (LexicalRule r: rules) { + logger.info("Rules for left: " + r.toString()); + } + } + + rules = resource.getRulesForRight("vedere", new ByCanonicalPartOfSpeech("V")); + if (rules != null) { + for (LexicalRule r: rules) { + logger.info("Rules for right: " + r.toString()); + } + } + + } //catch (LexicalResourceException lre) { + catch (Exception lre) { + logger.info("Testing the resource didn't work out"); + lre.printStackTrace(); + } +*/ + } catch (Exception e) { + throw new LexicalResourceException(e.getMessage()); + } + + logger.info("done."); + } + + + /** + * Return true if it exists a relation between leftLemma and rightLemma + * in the lexical resource. + * + * @param leftLemma + * @param leftPos + * @param rightLemma + * @param rightPos + * + * @return true if the rule exists; false otherwise + * + * @throws LexicalResourceException + */ + @Override + @SuppressWarnings("unchecked") + protected boolean getRulesFromResource(String leftLemma, PartOfSpeech leftPos, + String rightLemma, PartOfSpeech rightPos) throws LexicalResourceException { + + //logger.info("leftLemma:" + leftLemma + " leftPos:" + leftPos + "\t" + "rightLemma:" + rightLemma + " " + "rightPos:" + rightPos); + + List> rules = null; + + try { + + for (int i = 0; i < lexR.size(); i++) { + rules = lexR.get(i).getRules(leftLemma, leftPos, rightLemma, rightPos); + if (rules != null && rules.size() > 0) { + ruleCounter++; + return true; + } + } + + } catch (LexicalResourceException e) { + logger.severe(e.getMessage()); + //logger.severe("leftLemma:" + leftLemma + " leftPos:" + leftPos + "\t" + "rightLemma:" + rightLemma + " " + "rightPos:" + rightPos); + //throw new LexicalResourceException(e.getMessage()); + } catch (Exception e) { + logger.severe(e.getMessage()); + //logger.severe("leftLemma:" + leftLemma + " leftPos:" + leftPos + "\t" + "rightLemma:" + rightLemma + " " + "rightPos:" + rightPos); + //throw new LexicalResourceException(e.getMessage()); + } + + return false; + } + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/distance/FixedWeightLemmaEditDistancewRedis.java b/core/src/main/java/eu/excitementproject/eop/core/component/distance/FixedWeightLemmaEditDistancewRedis.java new file mode 100644 index 00000000..d1af3c9a --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/component/distance/FixedWeightLemmaEditDistancewRedis.java @@ -0,0 +1,110 @@ +package eu.excitementproject.eop.core.component.distance; + +import java.util.Map; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; + +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.exception.ComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; + + +/** + * The FixedWeightedLemmaEditDistance class extends FixedWeightedEditDistance. + * Given a pair of T-H, each of them represented as a sequences of tokens (i.e. the lemma of the tokens), the edit distance between + * T and H is the minimum number of operations required to convert T to H. + * + * @author Roberto Zanoli & Vivi@fbk + * + */ +public class FixedWeightLemmaEditDistancewRedis extends FixedWeightEditDistancewRedis { + + /** + * Construct a fixed weight edit distance with the following constant + * weights for edits: + * match weight is 0, substitute, insert and delete weights are + */ + public FixedWeightLemmaEditDistancewRedis() { + + super(); + + } + + + /** + * Constructor used to create this object. All the main parameters of the component are + * exposed in the constructor. Here is an example on how it can be used. + * + *
+	 * {@code
+	 * 
+	 * //setting the weights of the edit distance operations
+	 * double mMatchWeight = 0.0;
+	 * double mDeleteWeight = 0.0;
+	 * double mInsertWeight = 1.0;
+	 * double mSubstituteWeight = 1.0;
+	 * //enable stop words so that stop words will be removed.
+	 * boolean stopWordRemoval = true;
+	 * //the component has to work on a data set for Italian language
+	 * String language = "IT";
+	 * //setting the resources: wikipedia and wordnet will be used
+	 * Map resources = new HasMap();
+	 * resources.put("wordnet", "/tmp/wordnet/");
+	 * resources.put("wikipedia", "jdbc:mysql://nathrezim:3306/wikilexresita#johnsmith#mypasswd");
+	 * //creating an instance of the FixedWeightTokenEditDistance component
+	 * FixedWeightEditDistance fwed = 
+	 * new FixedWeightTokenEditDistance(mMatchWeight, mDeleteWeight, mInsertWeight, mSubstituteWeight, stopWordRemoval, language, resources)
+	 * 
+	 * }
+	 * 
+ * + * @param mMatchWeight weight for match + * @param mDeleteWeight weight for delete + * @param mInsertWeight weight for insert + * @param mSubstituteWeight weight for substitute + * @param stopWordRemoval if stop words has to be removed or not; Possible values are: true, false + * @param language the language of the data the component has to deal with; Possible values are: DE, EN, IT + * @param resources the external resources the component has to use; it is a key/value pairs table. + * The supported resources with their parameters are (reported as key/value pairs): + * wordnet, path of the resource residing in the file system, e.g. /tmp/wordnet/ + * wikipedia, dbConnection#dbUser#dbPasswd, e.g. jdbc:mysql://nathrezim:3306/wikilexresita#johnsmith#mypasswd + * + */ + public FixedWeightLemmaEditDistancewRedis(double mMatchWeight, double mDeleteWeight, double mInsertWeight, double mSubstituteWeight, boolean stopWordRemoval, String language, Map resources) throws ConfigurationException, ComponentException { + + super(mMatchWeight, mDeleteWeight, mInsertWeight, mSubstituteWeight, stopWordRemoval, language, resources); + + } + + + /** + * Constructor used to create this object. + * + * @param config the configuration + * + */ + public FixedWeightLemmaEditDistancewRedis(CommonConfig config) throws ConfigurationException, ComponentException { + + super(config); + + } + + + @Override + public String getComponentName() { + + return "FixedWeightLemmaEditDistancewRedis"; + + } + + + @Override + public String getTokenBaseForm(Token token) { + + return token.getLemma().getValue(); + + } + + +} + diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/geo/ConvertGeoSQL2Redis.java b/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/geo/ConvertGeoSQL2Redis.java index 2c2ff414..da09cea0 100644 --- a/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/geo/ConvertGeoSQL2Redis.java +++ b/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/geo/ConvertGeoSQL2Redis.java @@ -27,14 +27,14 @@ public static void main(String[] args) throws Exception { System.exit(0); } - int lPort = BasicRedisRunner.getInstance().run(args[1]); + int lPort = BasicRedisRunner.getInstance().run(args[1],false); JedisPool lPool = new JedisPool(new JedisPoolConfig(), "localhost",lPort,10000); Jedis lJedis = lPool.getResource(); lJedis.connect(); lJedis.getClient().setTimeoutInfinite(); lJedis.flushAll(); - int rPort = BasicRedisRunner.getInstance().run(args[2]); + int rPort = BasicRedisRunner.getInstance().run(args[2],false); JedisPool rPool = new JedisPool(new JedisPoolConfig(), "localhost",rPort,10000); Jedis rJedis = rPool.getResource(); rJedis.connect(); diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/geo/RedisBasedGeoLexicalResource.java b/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/geo/RedisBasedGeoLexicalResource.java index e35adc2f..39113813 100644 --- a/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/geo/RedisBasedGeoLexicalResource.java +++ b/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/geo/RedisBasedGeoLexicalResource.java @@ -55,11 +55,16 @@ public RedisBasedGeoLexicalResource(ConfigurationParams params) throws Configura portRight = params.getInt(Configuration.R2L_REDIS_PORT); } catch (ConfigurationException e) { } - + + boolean bVM = false; + try { + bVM = params.getBoolean(Configuration.REDIS_VM); + } catch (ConfigurationException e) { + } if (hostLeft == null || portLeft == -1 || hostRight == null || portRight == -1) { try { - leftRules = (redisDir == null ? new RedisBasedStringListBasicMap(params.get(Configuration.L2R_REDIS_DB_FILE)) : new RedisBasedStringListBasicMap(params.get(Configuration.L2R_REDIS_DB_FILE),redisDir)); - rightRules = (redisDir == null ? new RedisBasedStringListBasicMap(params.get(Configuration.R2L_REDIS_DB_FILE)) : new RedisBasedStringListBasicMap(params.get(Configuration.R2L_REDIS_DB_FILE), redisDir)); + leftRules = (redisDir == null ? new RedisBasedStringListBasicMap(params.get(Configuration.L2R_REDIS_DB_FILE),bVM) : new RedisBasedStringListBasicMap(params.get(Configuration.L2R_REDIS_DB_FILE),redisDir,bVM)); + rightRules = (redisDir == null ? new RedisBasedStringListBasicMap(params.get(Configuration.R2L_REDIS_DB_FILE),bVM) : new RedisBasedStringListBasicMap(params.get(Configuration.R2L_REDIS_DB_FILE), redisDir,bVM)); } catch (Exception e) { throw new RedisRunException(e); } @@ -76,8 +81,8 @@ public RedisBasedGeoLexicalResource(ConfigurationParams params) throws Configura public RedisBasedGeoLexicalResource(String leftRedisDBFile, String rightRedisDBFile) throws UnsupportedPosTagStringException, FileNotFoundException, RedisRunException, LexicalResourceException{ - leftRules = new RedisBasedStringListBasicMap(leftRedisDBFile); - rightRules = new RedisBasedStringListBasicMap(rightRedisDBFile); + leftRules = new RedisBasedStringListBasicMap(leftRedisDBFile, false); + rightRules = new RedisBasedStringListBasicMap(rightRedisDBFile,false); try { NOUN = new BySimplerCanonicalPartOfSpeech(SimplerCanonicalPosTag.NOUN); } catch (UnsupportedPosTagStringException e) { throw new LexicalResourceException("Bug: couldn't construct a new UnspecifiedPartOfSpeech(SimplerCanonicalPosTag.NOUN)",e); } } diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/germanet/GermaNetWrapper.java b/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/germanet/GermaNetWrapper.java index a3c35737..48833835 100644 --- a/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/germanet/GermaNetWrapper.java +++ b/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/germanet/GermaNetWrapper.java @@ -657,6 +657,17 @@ private Set> collectConceptualRules( } } } + + // remove 0-confidence rules. + // (setting 0 confidence on a relation will remove that relation from forming a rule.) + // -- Gil, 2014 September + Set> check = new HashSet>(result); + for(LexicalRule l : check) + { + if (l.getConfidence() == 0) + result.remove(l); + } + return result; } @@ -710,6 +721,17 @@ private Set> collectLexicalRules( } } } + + // remove 0-confidence rules. + // (setting 0 confidence on a relation will remove that relation from forming a rule.) + // -- Gil, 2014 September + Set> check = new HashSet>(result); + for(LexicalRule l : check) + { + if (l.getConfidence() == 0) + result.remove(l); + } + return result; } diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/verb_ocean/VerbOceanLexicalResourceDemo.java b/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/verb_ocean/VerbOceanLexicalResourceDemo.java index 98ae1150..c62f47ab 100644 --- a/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/verb_ocean/VerbOceanLexicalResourceDemo.java +++ b/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/verb_ocean/VerbOceanLexicalResourceDemo.java @@ -33,14 +33,14 @@ public class VerbOceanLexicalResourceDemo { public static void main(String[] args) throws UnsupportedPosTagStringException, LexicalResourceException, ConfigurationException, InitException { System.out.println("Start \n*****************************\n"); - String lLemma = "abandon"; - PartOfSpeech pos2 = new BySimplerCanonicalPartOfSpeech(SimplerCanonicalPosTag.NOUN); - String rLemma = "reconsider"; + String lLemma = "loathe"; + PartOfSpeech pos2 = new BySimplerCanonicalPartOfSpeech(SimplerCanonicalPosTag.VERB); + String rLemma = "dislike"; System.out.println("Looking for all rules from \"" + lLemma + "\" to \"" + rLemma + "\""); Set allowedRelationTypes = Utils.arrayToCollection(new RelationType[]{RelationType.STRONGER_THAN, RelationType.HAPPENS_BEFORE , RelationType.CAN_RESULT_IN}, new LinkedHashSet()); - VerbOceanLexicalResource verbOceanLexR = new VerbOceanLexicalResource(1, new File("//qa-srv/Data/RESOURCES/VerbOcean/verbocean.unrefined.2004-05-20.txt"), + VerbOceanLexicalResource verbOceanLexR = new VerbOceanLexicalResource(1, new File("C:/Java/Packages/biutee/eop_2014.03.31/biutee/data/VerbOcean/verbocean.unrefined.2004-05-20.txt"), allowedRelationTypes); verbOceanLexR = new VerbOceanLexicalResource(verbOceanLexR); diff --git a/core/src/main/java/eu/excitementproject/eop/core/metaeda/MetaTEDecision.java b/core/src/main/java/eu/excitementproject/eop/core/metaeda/MetaTEDecision.java new file mode 100644 index 00000000..8397c2f0 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/metaeda/MetaTEDecision.java @@ -0,0 +1,42 @@ +package eu.excitementproject.eop.core.metaeda; + +import eu.excitementproject.eop.common.DecisionLabel; +import eu.excitementproject.eop.common.TEDecision; + +public class MetaTEDecision implements TEDecision { + + private DecisionLabel decision; + private double confidence; + private String pairID; + + public MetaTEDecision(DecisionLabel dLabel) { + super(); + this.decision = dLabel; + } + + public MetaTEDecision(DecisionLabel dLabel, Double confidence, String pairID) { + this(dLabel, pairID); + this.confidence = confidence; + } + + public MetaTEDecision(DecisionLabel dLabel, String pairID) { + this(dLabel); + this.pairID = pairID; + } + + @Override + public DecisionLabel getDecision() { + return this.decision; + } + + @Override + public double getConfidence() { + return this.confidence; + } + + @Override + public String getPairID() { + return this.pairID; + } + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/metaeda/SimpleMetaEDAConfidenceFeatures.java b/core/src/main/java/eu/excitementproject/eop/core/metaeda/SimpleMetaEDAConfidenceFeatures.java new file mode 100644 index 00000000..2b4ac989 --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/metaeda/SimpleMetaEDAConfidenceFeatures.java @@ -0,0 +1,948 @@ +package eu.excitementproject.eop.core.metaeda; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.uimafit.util.JCasUtil; + +//import weka.classifiers.Classifier; +import weka.classifiers.functions.Logistic; +import weka.core.Attribute; +import weka.core.FastVector; +import weka.core.Instance; +import weka.core.Instances; +import weka.core.SparseInstance; + +import eu.excitement.type.entailment.Pair; +import eu.excitementproject.eop.common.DecisionLabel; +import eu.excitementproject.eop.common.EDABasic; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.TEDecision; +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.configuration.NameValueTable; +import eu.excitementproject.eop.common.exception.ComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.PlatformCASProber; + +/** + * The MetaEDA performs as a higher level EDA. It implements the + * EDABasic interface. + * + * It uses multiple initialized EDABasic instances and their classifying results as features to make its own + * decision. + * It has two modes: + * 1) voting: each EDA's DecisionLabel counts as vote for NonEntailment or Entailment. + * SimpleMetaEDAConfidenceFeatures goes with the majority. In case of a tie, it decides NonEntailment. + * Note that there is no training in this mode. + * 2) confidences as features: each EDA's decision and its confidence on this decision is taken as a feature + * for a classifier which is then trained on the input pairs. + * If the decision is "NonEntailment", the numerical feature is the confidence*(-1), if it is "Entailment", the feature is simply the confidence. + * The trained model is stored and can be loaded again to use it for classifying new data. + * Training is performed with a weka classifier. + * + * SimpleMetaEDAConfidenceFeatures is initialized with a configuration file, where the following parameters need to be set: + * - "activatedEDA": the activated EDA, has to be eu.excitementproject.eop.core.SimpleMetaEDAConfidenceFeatures + * - "language": "EN", "DE" or any other language supported in internal EDABasics + * - "confidenceAsFeature": defines the mode (1 or 2), see above + * - "overwrite": whether to overwrite an existing model with the same name or not + * - "modelFile": path to model file + * - "trainDir": path to training data directory + * - "testDir": path to test data directory + * A sample configuration file can be found in core/src/test/resources/configuration-file/MetaEDATest1_DE.xml + * + * Alternatively, it can be initialized with the parameters parameters listed above directly, + * calling initialize(String language, boolean confidenceAsFeatures, boolean overwrite, String modelFile, String trainDir, String testDir). + * Note that we assume here that the activatedEDA is this SimpleMetaEDAConfidenceFeatures and does therefore not require passing the parameter. + * + * Please note that the following steps need to be done before initializing a SimpleMetaEDAConfidenceFeatures instance: + * 1) All EDABasic instances used for the MetaEDA must have been initialized correctly. + * The MetaEDA does not check whether they are correctly initialized. + * Details about how to initialize an EDABasic correctly can be found in their documentation. + * 2) Calling process() or startTraining() requires LAP annotations on test and training data (specified in testDir and trainDir) for the given EDABasic instances. + * Again, the MetaEDA does not check whether the required annotation layers are there. + * For details about the annotation layers required by each EDABasic, refer to the specific EDABasic's documentation. + * + * For usage examples see SimpleMetaEDAConfidenceFeaturesUsageExample.java. + * + * @author Julia Kreutzer + * + */ +public class SimpleMetaEDAConfidenceFeatures implements EDABasic{ + + /** + * the logger, "info" level just reports EDA statuses like "initializing", "training", etc.; + * "debug" level also reports TEDecisions from EDABasic instances and the SimpleMetaEDAConfidenceFeatures's decisions for classified data + */ + public final static Logger logger = Logger.getLogger(SimpleMetaEDAConfidenceFeatures.class.getName()); + + /** + * Constructs a new SimpleMetaEDAConfidenceFeatures instance with a list of already initialized + * basic EDAs. + * @param edas list of already initialized EDABasic instances + */ + public SimpleMetaEDAConfidenceFeatures(ArrayList> edas){ + this.edas = edas; + logger.info("new SimpleMetaEDAConfidenceFeatures with "+edas.size()+" internal EDABasics"); + } + + /** + * returns the classification results + * @return + */ + public HashMap getResults(){ + return this.results; + } + + /** + * returns the classifier + * @return + */ + public Logistic getClassifier(){ + return this.classifier; + } + + /** + * Initializes a SimpleMetaEDAConfidenceFeatures instance with a configuration file, + * where training and decision mode, overwrite mode, + * path to model file, training data and test data directory are defined + * @param config a CommonConfig where parameters and directories for SimpleMetaEDAConfidenceFeatures are defined + */ + @Override + public void initialize(CommonConfig config) throws ConfigurationException, + EDAException, ComponentException { + logger.info("initialize SimpleMetaEDAConfidenceFeatures with configuration file"); + initializeEDA(config); + initializeData(config); + if (!this.confidenceAsFeature){ + // mode 1: do nothing + } + else { + // mode 2: + // load and initialize pre-trained model + initializeModel(config); + } + } + + /** + * Initializes a SimpleMetaEDAConfidenceFeatures instance with parameters (without configuration file) + * that define training and decision mode, overwrite mode, + * path to model file, training data and test data directory + * @param language String, e.g. "EN" or "DE" + * @param confidenceAsFeatures if true: use confidence features, do majority vote otherwise + * @param modelFile String path to model file + * @param trainDir String path to training data directory + * @param testDir String path to test data directory + */ + public void initialize(String language, boolean confidenceAsFeatures, boolean overwrite, String modelFile, String trainDir, String testDir) throws ConfigurationException, + EDAException, ComponentException { + logger.info("initialize SimpleMetaEDAConfidenceFeatures with given parameters"); + initializeEDA(language, confidenceAsFeatures); + initializeData(trainDir, testDir); + if (!this.confidenceAsFeature){ + // mode 1: do nothing + } + else { + // mode 2: + // load and initialize pre-trained model + initializeModel(overwrite, modelFile); + } + } + + + /** + * Starts training on the EDABasic instances' confidence features with the given configuration. + * SimpleMetaEDAConfidenceFeatures initialization is included in this method. + * Note that training is only performed in mode 2 (confidence as features). + * In mode 2) a Logistic classifier is trained on the EDABasic decisions and confidences. + * Training and testing data directories are defined in the configuration file. + */ + @Override + public void startTraining(CommonConfig c) throws EDAException, LAPException { + this.isTrain = true; //set train flag + this.isTest = false; + try { + this.initialize(c); + } catch (ConfigurationException | EDAException | ComponentException e) { + e.printStackTrace(); + } + if (!this.confidenceAsFeature){ + //do nothing: no training in mode 1 + return; + } + else { + //mode 2 + logger.info("Start training with confidences from EDABasic instances as features."); + + ArrayList goldAnswers = new ArrayList(); //stores gold answers + + //xmi files in training directory + File [] xmis = new File(this.trainDir).listFiles(); + + //create attributes: for each EDABasic instance use their name and index as attribute name + FastVector attrs = getAttributes(); + + //build up the dataset from training data + Instances instances = new Instances("EOP", attrs, xmis.length); + + for (File xmi : xmis) { + if (!xmi.getName().endsWith(".xmi")) { + continue; + } + // The annotated pair is added into the CAS. + JCas jcas = PlatformCASProber.probeXmi(xmi, null); + Pair pair = JCasUtil.selectSingle(jcas, Pair.class); + logger.debug("processing pair "+pair.getPairID()); + int pairID = Integer.parseInt(pair.getPairID()); + + String goldAnswer = pair.getGoldAnswer(); //get gold annotation + logger.debug("gold answer: "+goldAnswer); + + //get features from BasicEDAs' confidence scores + ArrayList scores = getFeatures(jcas, pairID); + + //Store gold answer + goldAnswers.add(goldAnswer); + + //add new instance to dataset + Instance instance = new Instance(scores.size()); + instance.setDataset(instances); + for (int j = 0; j < scores.size(); j++){ + Double score = scores.get(j); + instance.setValue((Attribute) attrs.elementAt(j), score); + } + instances.add(instance); + } + + //last attribute is class prediction (either nonentailment or entailment) + FastVector values = new FastVector(); + values.addElement("NONENTAILMENT"); + values.addElement("ENTAILMENT"); + Attribute gold = new Attribute("gold", values); + instances.insertAttributeAt(gold, instances.numAttributes()); + instances.setClassIndex(instances.numAttributes()-1); // set class attribute -> last attribute (gold label) + + //set gold labels for instances + logger.info(instances.numInstances()+" training instances loaded with "+instances.numAttributes()+" attributes"); + for (int k = 0; k goldAnswers = new ArrayList(); //stores gold answers + + //xmi files in training directory + File [] xmis = new File(this.trainDir).listFiles(); + + //create attributes: for each EDABasic instance use their name and index as attribute name + FastVector attrs = getAttributes(); + + //build up the dataset from training data + Instances instances = new Instances("EOP", attrs, xmis.length); + + for (File xmi : xmis) { + if (!xmi.getName().endsWith(".xmi")) { + continue; + } + // The annotated pair is added into the CAS. + JCas jcas = PlatformCASProber.probeXmi(xmi, null); + Pair pair = JCasUtil.selectSingle(jcas, Pair.class); + int pairID = Integer.parseInt(pair.getPairID()); + logger.debug("processing pair "+pairID); + String goldAnswer = pair.getGoldAnswer(); //get gold annotation + logger.debug("gold answer: "+goldAnswer); + //get features from BasicEDAs' confidence scores + ArrayList scores = getFeatures(jcas, pairID); + + //Store gold answer + goldAnswers.add(goldAnswer); + + //add new instance to dataset + Instance instance = new Instance(scores.size()); + instance.setDataset(instances); + for (int j = 0; j < scores.size(); j++){ + Double score = scores.get(j); + instance.setValue((Attribute) attrs.elementAt(j), score); + } + instances.add(instance); + } + + //last attribute is class prediction (either nonentailment or entailment) + FastVector values = new FastVector(); + values.addElement("NONENTAILMENT"); + values.addElement("ENTAILMENT"); + Attribute gold = new Attribute("gold", values); + instances.insertAttributeAt(gold, instances.numAttributes()); + instances.setClassIndex(instances.numAttributes()-1); // set class attribute -> last attribute (gold label) + + //set gold labels for instances + logger.info(instances.numInstances()+" training instances loaded with "+instances.numAttributes()+" attributes"); + for (int k = 0; k in mode 1) just collect decisions from EDABasic instances and go with the majority (or NonEntailment in case of a tie) + * -> in mode 2) collect features from EDABasic instances for the JCas text and classify the data with this SimpleMetaEDAConfidenceFeatures's trained weka classifier + * @param aCas the JCas to process + * @return a MetaTEDecision with decision label, confidence, and pairID for the classified input JCas + */ + @Override + public MetaTEDecision process(JCas aCas) throws EDAException, + ComponentException { + + Pair pair = JCasUtil.selectSingle(aCas, Pair.class); + int pairID = Integer.parseInt(pair.getPairID()); + + //generate the confidence features + List features = getFeatures(aCas, pairID); + + + DecisionLabel dLabel; + double[] distribution = new double[2]; //at index 0: probability for NonEntailment, index 1: probability for Entailment + + + //mode 2: classify on features collected from BasicEDAs' decisions + if (this.confidenceAsFeature){ + //create attributes: for each EDABasic instance use their name and index as attribute name + FastVector attrs = getAttributes(); + + //build up the dataset, here only a single instance + Instances instances = new Instances("EOP", attrs, 1); + + //last attribute is class prediction (either nonentailment or entailment) + FastVector values = new FastVector(); + values.addElement("NONENTAILMENT"); + values.addElement("ENTAILMENT"); + instances.insertAttributeAt(new Attribute("prediction", values), instances.numAttributes()); + instances.setClassIndex(edas.size()); // set class attribute -> last attribute which is prediction + + //add new instance to dataset + Instance instance = new SparseInstance(features.size() + 1); + instance.setDataset(instances); + for (int i = 0; i < features.size(); i++){ + Double score = features.get(i); + instance.setValue((Attribute) attrs.elementAt(i), score); + } + instances.add(instance); + + logger.debug("classifying pair no. "+pairID); + + //classify instance + double result = 0.0; + try { + result = this.classifier.classifyInstance(instances.firstInstance()); + distribution = this.classifier.distributionForInstance(instances.firstInstance()); + instances.firstInstance().setClassValue(result); + } catch (Exception e) { + e.printStackTrace(); + } + + //determine the result label + String label = instances.firstInstance().classAttribute().value((int)result); + + //convert to a DecisionLabel instance + if (label.toUpperCase().equals(DecisionLabel.Entailment.toString().toUpperCase())) + dLabel = DecisionLabel.Entailment; + else + dLabel = DecisionLabel.NonEntailment; + + logger.debug("DecisionLabel: "+dLabel); + + } else { + //mode 1: majority vote + int decision = 0; + int nonEntCount = 0; //count how often NonEntailment is voted + int entCount = 0; //same for Entailment + for (Double feature : features) { + if (feature < 0){ + decision -= 1; + nonEntCount += 1; + } else { + decision += 1; + entCount += 1; + } + } + distribution[0]=nonEntCount/features.size(); + distribution[1]=entCount/features.size(); + + if (decision <= 0){ + dLabel = DecisionLabel.NonEntailment; + } else { + dLabel = DecisionLabel.Entailment; + } + logger.debug("DecisionLabel after voting: "+dLabel); + } + double confidence = 0; + if (dLabel == DecisionLabel.Entailment){ + confidence = distribution[1]; + this.results.get(pairID)[edas.size()+1] = confidence; //on last position in array comes meta decision + } + else if (dLabel == DecisionLabel.NonEntailment){ + confidence = distribution[0]; + this.results.get(pairID)[edas.size()+1] = -1* confidence; + } + + return new MetaTEDecision(dLabel, confidence, pair.getPairID()); + } + + /** + * shuts down SimpleMetaEDAConfidenceFeatures and disengage all resources + */ + @Override + public void shutdown() { + //disengage resources or reset to default value + this.confidenceAsFeature = false; + this.edas = null; + this.language = ""; + this.modelFile = ""; + this.trainDir = ""; + this.testDir = ""; + this.classifier = null; + this.isTrain = false; + this.isTest = false; + this.overwrite = false; + } + + /** + * get the model file + * @return the path to the modelFile (String) + */ + public String getModelFile() { + return modelFile; + } + + //both isTest and isTrain needed, as initialization can take place before testing or training is defined + /** + * whether SimpleMetaEDAConfidenceFeatures is in testing mode or not + * @return true if SimpleMetaEDAConfidenceFeatures is used for testing, false otherwise + */ + public boolean isTest() { + return isTest; + } + + /** + * whether SimpleMetaEDAConfidenceFeatures is in training mode or not + * @return true if SimpleMetaEDAConfidenceFeatures is used for training, false otherwise + */ + public boolean isTrain() { + return isTrain; + } + + /** + * wether SimpleMetaEDAConfidenceFeatures is used in mode 2) (use EDABasics' decision confidences for training) + * @return true if SimpleMetaEDAConfidenceFeatures is in mode 2) (confidence as features), false otherwise + */ + public boolean isConfidenceAsFeature() { + return confidenceAsFeature; + } + + /** + * get the list of initialized EDABasic instances this SimpleMetaEDAConfidenceFeatures is based on + * @return ArrayList of EDABasic instances + */ + public ArrayList> getEdas() { + return edas; + } + + /** + * get the language, e.g. "DE" for German, "EN" for English + * @return language String + */ + public String getLanguage() { + return language; + } + + /** + * whether SimpleMetaEDAConfidenceFeatures is in "overwrite" mode, i.e. if model file does already exist, it is overwritten + * @return true if already existing model files should get overwritten, false otherwise + */ + public boolean isOverwrite() { + return overwrite; + } + + /** + * set the isTest parameter to distinguish between test and training mode + * @param true or false + */ + public void setTest(boolean b) { + this.isTest = b; + } + + /** + * get the path to the test data directory + * @return path to test data directory (String) + */ + public String getTestDir() { + return this.testDir; + } + + /** + * get the path to the training data directory + * @return path to training data directory (String) + */ + public String getTrainDir() { + return this.trainDir; + } + + /** + * the mode: use the EDABasics' confidence scores as features for training (2) or just decide via majority vote (1) + */ + private boolean confidenceAsFeature; //if true: 2nd mode + + /* + * store classification results in hashmap, one entry for each pair, especially useful for testing + */ + private HashMap results = new HashMap(); + + /** + * contains all internal basic EDAs + */ + private ArrayList> edas; + + /** + * the language, e.g. "EN" for English + */ + private String language; + + /** + * the path to model file + */ + private String modelFile; + + /** + * the path to training data + */ + private String trainDir; + + /** + * the path to testing data + */ + private String testDir; + + /** + * the weka classifier + */ + private Logistic classifier; + + //both isTest and isTrain needed, as initialization can take place before testing or training is defined + /** + * when true: in training mode + */ + private boolean isTrain = false; + + /** + * when true: in testing mode + */ + private boolean isTest = false; + + /** + * when true: overwrites existing models while training, false: appends "_old" to existing model file + */ + private boolean overwrite = false; + + /** + * Initializes the EDA: + * initializes the language flag from the configuration, + * checks that the correct activated EDA is specified in config, + * and sets the mode (use confidence as features or not). + * @param config the CommonConfig configuration + * @throws ConfigurationException + */ + private void initializeEDA(CommonConfig config) + throws ConfigurationException { + NameValueTable top = config.getSection("PlatformConfiguration"); + if (null == top || !top.getString("activatedEDA").equals(this.getClass().getName())) { + throw new ConfigurationException("Please specify the (correct) EDA."); + } + this.language = top.getString("language"); + if (!(this.language != null)) { + this.language = "EN"; // default language is English + } + + NameValueTable EDA = null; + try { + EDA = config.getSection(this.getClass().getName()); + } catch (ConfigurationException e) { + throw new ConfigurationException(e.getMessage() + " No EDA section."); + } + + if (EDA.getString("confidenceAsFeature") != null){ //default is false + this.confidenceAsFeature = Boolean.parseBoolean(EDA.getString("confidenceAsFeature")); + if (this.confidenceAsFeature){ + logger.info("mode 2: use confidence scores as features"); + } + else { + logger.info("mode 1: majority vote"); + } + } + else { + throw new ConfigurationException("Please specify SimpleMetaEDAConfidenceFeatures's mode: use confidence scores as features or not."); + } + } + + /** + * Initializes the EDA: + * initializes the language flag + * and sets the mode (use confidence as features or not). + * @param language the language used, e.g. "DE" + * @param confidenceAsFeatures mode 2 (using confidence features for training) if true, mode 1 (majority vote) otherwise + */ + private void initializeEDA(String language, boolean confidenceAsFeature){ + this.language = language; + if (!(this.language != null)) { + this.language = "EN"; // default language is English + } + this.confidenceAsFeature = confidenceAsFeature; + if (this.confidenceAsFeature){ + logger.info("mode 2: use confidence scores as features"); + } + else { + logger.info("mode 1: majority vote"); + } + } + + + /** + * Initializes the model by either reading existing model or creating a new model file. + * If "overwrite" is set to true and SimpleMetaEDAConfidenceFeatures is in training mode, an existing model file with the same name is overwritten. + * If it is set to false, the old model file is renamed with the ending "_old". + * + * @param config the CommonConfig configuration + * @throws ConfigurationException + */ + private void initializeModel(CommonConfig config) + throws ConfigurationException { + + NameValueTable EDA = null; + try { + EDA = config.getSection(this.getClass().getName()); + } catch (ConfigurationException e) { + throw new ConfigurationException(e.getMessage() + + " No EDA section."); + } + + if (EDA.getString("overwrite") != null){ //default is false + this.overwrite = Boolean.parseBoolean(EDA.getString("overwrite")); + } + else { + if (this.isTrain){ + throw new ConfigurationException("Please specify SimpleMetaEDAConfidenceFeatures's overwrite mode."); + } + } + + this.modelFile = EDA.getString("modelFile"); + if (null == this.modelFile){ + throw new ConfigurationException("No model directory specified in config file."); + } + + File file = new File(modelFile); + + if (file.exists()){ + if (this.isTrain && !this.isTest){ + if (this.overwrite){ + logger.info("The existing model will be overwritten."); + } + else{ + String oldModelFile = modelFile + "_old"; + logger.info("The existing model is renamed to "+file.getAbsolutePath()+"_old"); + File oldFile = new File(oldModelFile); + if (oldFile.exists()) + oldFile.delete(); + file.renameTo(oldFile); + } + } + else if (this.isTest){ + logger.info("Reading model from "+file.getAbsolutePath()); + //deserialize model to classifier + ObjectInputStream ois; + try { + ois = new ObjectInputStream(new FileInputStream(this.modelFile)); + this.classifier = (Logistic) ois.readObject(); + ois.close(); + } catch (IOException e) { + e.printStackTrace(); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + } + } + else { + if (this.isTrain && !this.isTest){ + logger.info("The trained model will be stored in "+ file.getAbsolutePath()); + try { + file.createNewFile(); + } catch (IOException e) { + e.printStackTrace(); + } + } + else if (this.isTest){ + throw new ConfigurationException("The model specified in the configuration does NOT exist! Please give the correct file path."); + } + } + } + + /** + * Initializes the model by either reading existing model or creating a new model file. + * If "overwrite" is set to true and SimpleMetaEDAConfidenceFeatures is in training mode, an existing model file with the same name is overwritten. + * If it is set to false, the old model file is renamed with the ending "_old". + * + * @param overwrite if true: overwrite possibly existing model file with same name, rename it otherwise + * @param modelFile String path to model file + * @throws ConfigurationException + */ + private void initializeModel(boolean overwrite, String modelFile) + throws ConfigurationException { + + this.overwrite = overwrite; + this.modelFile = modelFile; + if (null == this.modelFile){ + throw new ConfigurationException("No model directory specified."); + } + + File file = new File(modelFile); + + if (file.exists()){ + if (this.isTrain && !this.isTest){ + if (this.overwrite){ + logger.info("The existing model will be overwritten."); + } + else{ + String oldModelFile = modelFile + "_old"; + logger.info("The existing model is renamed to "+file.getAbsolutePath()+"_old"); + File oldFile = new File(oldModelFile); + if (oldFile.exists()) + oldFile.delete(); + file.renameTo(oldFile); + } + } + else if (this.isTest){ + logger.info("Reading model from "+file.getAbsolutePath()); + //deserialize model to classifier + ObjectInputStream ois; + try { + ois = new ObjectInputStream(new FileInputStream(this.modelFile)); + this.classifier = (Logistic) ois.readObject(); + ois.close(); + } catch (IOException e) { + e.printStackTrace(); + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + } + } + else { + if (this.isTrain && !this.isTest){ + logger.info("The trained model will be stored in "+ file.getAbsolutePath()); + try { + file.createNewFile(); + } catch (IOException e) { + e.printStackTrace(); + } + } + else if (this.isTest){ + throw new ConfigurationException("The model specified in the configuration does NOT exist! Please give the correct file path."); + } + } + } + + + /** + * Initializes the data, for training and/or testing + * @param config the CommonConfig configuration + * @throws ConfigurationException + */ + private void initializeData(CommonConfig config) + throws ConfigurationException { + NameValueTable EDA = null; + try { + EDA = config.getSection(this.getClass().getName()); + } catch (ConfigurationException e) { + throw new ConfigurationException(e.getMessage() + + " No EDA section."); + } + this.trainDir = EDA.getString("trainDir"); + if (null == trainDir) { + if (this.isTrain && !this.isTest) { + throw new ConfigurationException("Please specify the training data directory."); + } else { + logger.warn("Warning: Please specify the training data directory."); + } + } + this.testDir = EDA.getString("testDir"); + if (null == testDir) { + if (this.isTest && !this.isTrain) { + throw new ConfigurationException("Please specify the testing data directory."); + } else { + logger.warn("Warning: Please specify the testing data directory."); + } + } + } + + /** + * Initializes the data, for training and/or testing + * @throws ConfigurationException + * @param trainDir directory for training data + * @param testDir directory for test data + */ + private void initializeData(String trainDir, String testDir) throws ConfigurationException { + this.trainDir = trainDir; + if (null == trainDir) { + if (this.isTrain && !this.isTest) { + throw new ConfigurationException("Please specify the training data directory."); + } else { + logger.warn("Warning: Please specify the training data directory."); + } + } + this.testDir = testDir; + if (null == testDir) { + if (this.isTest && !this.isTrain) { + throw new ConfigurationException("Please specify the testing data directory."); + } else { + logger.warn("Warning: Please specify the testing data directory."); + } + } + + } + + + /** + * Gets the attributes for all internal EDAs (n internal EDAs -> n attributes). + * The attributes are named after the internal EDAs and an additional index + * to prevent ambiguities if more than one EDABasic of the same type is used. + * @return a FastVector with the attributes + */ + private FastVector getAttributes(){ + FastVector attrs = new FastVector(); + for (int i = 0; i < this.edas.size(); i++){ + EDABasic eda = this.edas.get(i); + attrs.addElement(new Attribute(eda.getClass().getSimpleName()+i)); + } + return attrs; + } + + /** + * Retrieves confidence scores from all BasicEDAs for one given JCas. + * Each BasicEDA instance produces one feature each. N edas -> N features. + * @param jcas the JCas to process + * @param pairID the according pairID + * @return an ArrayList of features for the given JCas. + */ + private ArrayList getFeatures(JCas jcas, int pairID) { + ArrayList features = new ArrayList(); + double[] resultsForPair = new double[this.edas.size()+2]; + for (int i=0; i eda = this.edas.get(i); + //process aCas and get confidence + TEDecision decision = null; + try { + decision = eda.process(jcas); + logger.debug(eda.getClass().getSimpleName()+i+"'s decision: "+decision.getDecision()+" "+decision.getConfidence()); + + if (decision.equals(null)){ + throw new EDAException("The internal EDA "+eda.getClass().getSimpleName()+i+"could not process the data." + + "Please check the internal EDA's configuration"); + } + } catch (EDAException | ComponentException e) { + e.printStackTrace(); + } + double confidence = decision.getConfidence(); + DecisionLabel label = decision.getDecision(); + + //use a negative sign for non entailment confidence scores + if (label == DecisionLabel.NonEntailment){ + confidence = confidence * -1; + } + features.add(confidence); + resultsForPair[i+1]= confidence; + this.results.put(pairID, resultsForPair); + } + logger.debug("SimpleMetaEDAConfidenceFeatures features from EDABasic confidences: "+features.toString()); + return features; + } + +} diff --git a/core/src/main/java/eu/excitementproject/eop/core/metaeda/SimpleMetaEDAConfidenceFeaturesUsageExample.java b/core/src/main/java/eu/excitementproject/eop/core/metaeda/SimpleMetaEDAConfidenceFeaturesUsageExample.java new file mode 100644 index 00000000..b84bcaad --- /dev/null +++ b/core/src/main/java/eu/excitementproject/eop/core/metaeda/SimpleMetaEDAConfidenceFeaturesUsageExample.java @@ -0,0 +1,978 @@ +package eu.excitementproject.eop.core.metaeda; + +import java.io.File; +import java.util.ArrayList; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import java.util.Arrays; +import java.util.HashMap; + +import org.apache.commons.io.FileUtils; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Assume; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.maltparser.MaltParser; + +import eu.excitement.type.entailment.Pair; +import eu.excitementproject.eop.common.DecisionLabel; +import eu.excitementproject.eop.common.EDABasic; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.TEDecision; +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.exception.ComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.common.utilities.configuration.ImplCommonConfig; +import eu.excitementproject.eop.core.ClassificationTEDecision; +import eu.excitementproject.eop.core.EditDistancePSOEDA; +import eu.excitementproject.eop.core.MaxEntClassificationEDA; +import eu.excitementproject.eop.core.metaeda.SimpleMetaEDAConfidenceFeatures; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.PlatformCASProber; +import eu.excitementproject.eop.lap.dkpro.MaltParserDE; +import eu.excitementproject.eop.lap.dkpro.MaltParserEN; + +/** + * Caution: this example requires TreeTagger for successful running. see the following file in the source tree, + * /Excitement-Open-Platform/lap/src/scripts/treetagger/README.txt + * or see the following URL + * https://github.com/hltfbk/Excitement-Open-Platform/wiki/Step-by-Step,-TreeTagger-Installation + * === + * === + * + * This class performs as a usage example with tests for MetaEDA. + * The user can test some sample configurations, modify them or create and test a MetaEDA with their own configurations. + * + * MetaEDA performs as a higher level EDA. It implements the + * EDABasic interface. + * + * It uses multiple initialized EDABasic instances and their classifying results as features to make its own + * decision. + * It has two modes: + * 1) voting: each EDA's DecisionLabel counts as vote for NonEntailment or Entailment. + * MetaEDA goes with the majority. In case of a tie, it decides NonEntailment. + * Note that there is no training in this mode. + * 2) confidences as features: each EDA's decision and its confidence on this decision is taken as a feature + * for a classifier which is then trained on the input pairs. + * If the decision is "NonEntailment", the numerical feature is the confidence*(-1), if it is "Entailment", the feature is simply the confidence. + * The trained model is stored and can be loaded again to use it for classifying new data. + * Training is performed with a weka classifier. + * + * MetaEDA is initialized with a configuration file, where the following parameters need to be set: + * - "activatedEDA": the activated EDA, has to be eu.excitementproject.eop.core.MetaEDA + * - "language": "EN", "DE" or any other language supported in internal EDABasics + * - "confidenceAsFeature": defines the mode (1 or 2), see above + * - "overwrite": whether to overwrite an existing model with the same name or not + * - "modelFile": path to model file + * - "trainDir": path to training data directory + * - "testDir": path to test data directory + * A sample configuration file can be found in core/src/test/resources/configuration-file/MetaEDATest1_DE.xml + * + * Alternatively, it can be initialized with the parameters parameters listed above directly, + * calling initialize(String language, boolean confidenceAsFeatures, boolean overwrite, String modelFile, String trainDir, String testDir). + * We assume here that the activatedEDA is this SimpleMetaEDAConfidenceFeatures and does therefore not require passing the parameter. + * + * Please note that the following steps need to be done before initializing a SimpleMetaEDAConfidenceFeatures instance: + * 1) All EDABasic instances used for the MetaEDA must have been initialized correctly. + * The MetaEDA does not check whether they are correctly initialized. + * Details about how to initialize an EDABasic correctly can be found in their documentation. + * 2) Calling process() or startTraining() requires LAP annotations on test and training data (specified in testDir and trainDir) for the given EDABasic instances. + * Again, the MetaEDA does not check whether the required annotation layers are there. + * For details about the annotation layers required by each EDABasic, refer to the specific EDABasic's documentation. + * + * Although the examples in this class do only cover English and German, the usage of SimpleMetaEDAConfidenceFeatures is not restricted to any language. + * In order to use SimpleMetaEDAConfidenceFeatures for a language, the user needs all EDABasic instances to be able to handle the given language, and corresponding test and training data in RTE-format. + * + * @author Julia Kreutzer + * + */ +public class SimpleMetaEDAConfidenceFeaturesUsageExample { + static Logger logger = Logger.getLogger(SimpleMetaEDAConfidenceFeatures.class + .getName()); + + /** + * The main method calls the two methods "testDE" and "testEN", methods for running German or English tests respectively. + * The user can add own test methods to run, or comment out test methods which they wish not to run. + * @param args + */ + public static void main(String[] args){ + logger.setLevel(Level.DEBUG); //change level to "INFO" if you want to skip detailed information about processes + SimpleMetaEDAConfidenceFeaturesUsageExample test = new SimpleMetaEDAConfidenceFeaturesUsageExample(); + + //perform tests contained in testDE method for German + test.testDE(); + //perform tests contained in testEN method for English + test.testEN(); + } + + /** + * tests for German with sample configurations + * - comment out tests you wish not to run + * - or: modify code in called test classed + * test1DE: majority vote, training and testing with TIE: Base and Edits: PSO + * test2DE: confidence as features, training and testing with TIE: Base+DB and Edits: PSO + * test3DE: confidence as features, loading the model created in test2DE and testing + */ + public void testDE(){ + test1DE(); + test2DE(); //running all three tests takes a long time + test3DE(); //test 2 has to run before 3 + } + + /** + * tests for English with sample configurations + * - comment out tests you wish not to run! + * - or: modify code in called test classed + * test1EN: similar to test1DE (majority vote), training and testing, TIE: Base and Edits: PSO + * test2EN: similar to test2DE (confidence as features), training and testing with TIE: Base and Edits: PSO + * test3EN: similar to test3DE (confidence as features), loading the model created in test2En and testing + */ + public void testEN(){ + test1EN(); + test2EN(); //running all three tests takes a long time + test3EN(); //test 2 has to run before 3 + } + + /** + * Tests MetaEDA in mode 1 (majority vote) with two internal EDAs for German: 1)TIE:Base and 2)Edits:PSO. + * + * 1) loads MetaEDA configuration file + * 2) initializes TIE and Edits instance + * 3) constructs a MetaEDA with the two EDABasics + * 4) preprocess test data by calling preprocess(MetaEDA) method + * 5) process test data with MetaEDA (majority vote) by calling testMetaEDA(MetaEDA) method and prints results to stdout + * + * The sample configuration file is loaded from "./src/test/resources/configuration-file/MetaEDATest1_DE.xml" + * Test and training data, model file directory, overwrite mode are defined there. + * + * Each testing data sample is first processed by both internal EDAs. + * Their decisions are consequently used for the MetaEDA to find a meta decision, which goes with the majority. + * E.g. Both EDABasics return "NonEntailment", so MetaEDA will return "NonEntailment" as well. + * Or the opposite case: both return "Entailment", so MetaEDA decides "Entailment" as well. + * If one of them votes for "Entailment", and one for "NonEntailment", MetaEDA will decide "NonEntailment", as reaction to a tie. + * + * Of course, more than two EDABasic instances can be included. + * Note that they need to be initialized before constructing a MetaEDA. + * + * Note that some EDABasics require certain linguistic preprocessing steps. + * The user has to provide these annotation layers on training and test data by calling linguistic preprocessing tools in the preprocess(MetaEDA) method. + * For this example, data is preprocessed with TreeTagger. + * + * @throws EDAException + * @throws ComponentException + */ + public void test1DE(){ + + logger.info("SimpleMetaEDAConfidenceFeatures test (mode 1) started"); + File metaconfigFile = new File("./src/test/resources/configuration-file/MetaEDATest1_DE.xml"); + + Assume.assumeTrue(metaconfigFile.exists()); + CommonConfig metaconfig = null; + try { + //read in the configuration from the file + metaconfig = new ImplCommonConfig(metaconfigFile); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(metaconfig); + + ArrayList> edas = new ArrayList>(); + + //initialize TIE instance + EDABasic eda1 = new MaxEntClassificationEDA(); + EDABasic meceda = eda1; + File mecedaconfigfile = new File("./src/main/resources/configuration-file/MaxEntClassificationEDA_Base_DE.xml"); + CommonConfig mecedaconfig = null; + try { + // read in the configuration from the file + mecedaconfig = new ImplCommonConfig(mecedaconfigfile); + logger.info("MaxEntClassification EDA config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(mecedaconfig); + + logger.info("initialize MaxEntClassification and load model"); + try { + meceda.initialize(mecedaconfig); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(meceda); + + //initialize EditDistancePSOEDA + EditDistancePSOEDA eda2 = new EditDistancePSOEDA(); + EDABasic edpsoeda = eda2; + File edpsoedaconfigfile = new File("./src/main/resources/configuration-file/EditDistancePSOEDA_DE.xml"); + CommonConfig edpsoedaconfig = null; + try { + // read in the configuration from the file + edpsoedaconfig = new ImplCommonConfig(edpsoedaconfigfile); + logger.info("EditDistancePSO EDA config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(edpsoedaconfig); + + logger.info("initialize EditDistancePSO and load model"); + try { + edpsoeda.initialize(edpsoedaconfig); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(edpsoeda); + + //construct SimpleMetaEDAConfidenceFeatures + SimpleMetaEDAConfidenceFeatures meda = new SimpleMetaEDAConfidenceFeatures(edas); + //preprocess test and training data + try { + meda.initialize(metaconfig); + preprocess(meda); + logger.info("Initialization done."); + } catch (Exception e) { + e.printStackTrace(); + } + + testMetaEDA(meda); + + meda.shutdown(); + logger.info("EDA shuts down."); + } + + + /** + * Tests SimpleMetaEDAConfidenceFeatures in training mode 2 (confidence as features) with two internal EDAs for German: 1)TIE:Base+DB and 2)Edits:PSO + * + * 1) loads SimpleMetaEDAConfidenceFeatures configuration file + * 2) initializes TIE and Edits instance + * 3) constructs a SimpleMetaEDAConfidenceFeatures with the two EDABasics + * 4) preprocess test data by calling preprocess(SimpleMetaEDAConfidenceFeatures) method + * 5) trains the SimpleMetaEDAConfidenceFeatures on training data with EDABasic confidences as features + * 5) process test data with SimpleMetaEDAConfidenceFeatures (majority vote) by calling testMetaEDA(MetaEDA) method and prints results to stdout + * + * The sample configuration file is loaded from "./src/test/resources/configuration-file/MetaEDATest2_DE.xml" + * Test and training data, model file directory, overwrite mode are defined there. + * + * Each training data sample is first processed by both internal EDAs. + * Their decisions are consequently used for the SimpleMetaEDAConfidenceFeatures to find a meta decision by serving as features for training of a weka classifier. + * E.g. One EDABasic returns "NonEntailment" and the confidence 0.4, so SimpleMetaEDAConfidenceFeatures uses the feature "-0.4" for training. + * Another EDABasic returns "Entailment" with confidence 0.8, so SimpleMetaEDAConfidenceFeatures uses the feature "0.8" for training. + * After training, the MetaEDA model is serialized and stored in the file defined in the configuration. + * + * Of course, more than two EDABasic instances can be included. + * Note that they need to be initialized before constructing a SimpleMetaEDAConfidenceFeatures. + * + * @throws EDAException + * @throws ComponentException + */ + public void test2DE() { + + logger.info("SimpleMetaEDAConfidenceFeatures test (mode 2) started"); + + File metaconfigFile = new File("./src/test/resources/configuration-file/MetaEDATest2_DE.xml"); + + Assume.assumeTrue(metaconfigFile.exists()); + CommonConfig metaconfig = null; + try { + // read in the configuration from the file + metaconfig = new ImplCommonConfig(metaconfigFile); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(metaconfig); + + ArrayList> edas = new ArrayList>(); + + //initialize TIE instance + EDABasic tieEDA = new MaxEntClassificationEDA(); + EDABasic meceda2 = tieEDA; + File mecedaconfigfile2 = new File("./src/main/resources/configuration-file/MaxEntClassificationEDA_Base+DS_DE.xml"); + CommonConfig mecedaconfig2 = null; + try { + // read in the configuration from the file + mecedaconfig2 = new ImplCommonConfig(mecedaconfigfile2); + logger.info("MaxEntClassification EDA 2 config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(mecedaconfig2); + + logger.info("initialize MaxEntClassification 2 and load model"); + try { + meceda2.initialize(mecedaconfig2); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(meceda2); + + //initialize EditDistancePSOEDA + EditDistancePSOEDA eda3 = new EditDistancePSOEDA(); + EDABasic edpsoeda = eda3; + File edpsoedaconfigfile = new File("./src/main/resources/configuration-file/EditDistancePSOEDA_DE.xml"); + CommonConfig edpsoedaconfig = null; + try { + // read in the configuration from the file + edpsoedaconfig = new ImplCommonConfig(edpsoedaconfigfile); + logger.info("EditDistancePSO EDA config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(edpsoedaconfig); + + logger.info("initialize EditDistancePSO and load model"); + try { + edpsoeda.initialize(edpsoedaconfig); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(edpsoeda); + + //construct meta EDA + SimpleMetaEDAConfidenceFeatures meda = new SimpleMetaEDAConfidenceFeatures(edas); + //preprocess test and training data + try { + meda.initialize(metaconfig); + preprocess(meda); + meda.startTraining(metaconfig); + } catch (Exception e) { + e.printStackTrace(); + } + testMetaEDA(meda); + + meda.shutdown(); + logger.info("EDA shuts down."); + } + + + /** + * Tests SimpleMetaEDAConfidenceFeatures processing with model file created in test2DE for German (two internal EDAs for German: 1)TIE:Base+DB and 2)Edits:PSO). + * + * 1) loads SimpleMetaEDAConfidenceFeatures configuration file + * 2) initializes TIE and Edits instance + * 3) constructs a SimpleMetaEDAConfidenceFeatures with the two EDABasics and loads trained model + * 4) preprocess test data by calling preprocess(MetaEDA) method + * 5) process test data with SimpleMetaEDAConfidenceFeatures (majority vote) by calling testMetaEDA(MetaEDA) method and prints results to stdout + * + * The sample configuration file is loaded from "./src/test/resources/configuration-file/MetaEDATest3_DE.xml" + * Test and training data, model file directory, overwrite mode are defined there. + * + * Each test data sample is first processed by both internal EDAs. + * Their decisions are consequently used for the SimpleMetaEDAConfidenceFeatures to find a meta decision by serving as features for classifying with the pre-trained weka classifier. + * E.g. One EDABasic returns "NonEntailment" and the confidence 0.4, so SimpleMetaEDAConfidenceFeatures uses the feature "-0.4" for classifying. + * Another EDABasic returns "Entailment" with confidence 0.8, so SimpleMetaEDAConfidenceFeatures uses the feature "0.8" for classifying. + * + * For consistencies sake it is important to initialize and use the same EDABasics as in test2DE(). + * Note that they need to be initialized before constructing a SimpleMetaEDAConfidenceFeatures. + * @throws EDAException + * @throws ComponentException + */ + public void test3DE(){ + + logger.info("SimpleMetaEDAConfidenceFeatures test (mode 2) processing started"); + + File metaconfigFile = new File("./src/test/resources/configuration-file/MetaEDATest2_DE.xml"); + + Assume.assumeTrue(metaconfigFile.exists()); + CommonConfig metaconfig = null; + try { + // read in the configuration from the file + metaconfig = new ImplCommonConfig(metaconfigFile); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(metaconfig); + + ArrayList> edas = new ArrayList>(); + + //initialize TIE instance + EDABasic eda4 = new MaxEntClassificationEDA(); + EDABasic meceda2 = eda4; + File mecedaconfigfile2 = new File("./src/main/resources/configuration-file/MaxEntClassificationEDA_Base+DS_DE.xml"); + CommonConfig mecedaconfig2 = null; + try { + // read in the configuration from the file + mecedaconfig2 = new ImplCommonConfig(mecedaconfigfile2); + logger.info("MaxEntClassification EDA config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(mecedaconfig2); + + logger.info("initialize MaxEntClassification and load model"); + try { + meceda2.initialize(mecedaconfig2); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(meceda2); + + //initialize EditDistancePSOEDA + EditDistancePSOEDA eda5 = new EditDistancePSOEDA(); + EDABasic edpsoeda = eda5; + File edpsoedaconfigfile = new File("./src/main/resources/configuration-file/EditDistancePSOEDA_DE.xml"); + CommonConfig edpsoedaconfig = null; + try { + // read in the configuration from the file + edpsoedaconfig = new ImplCommonConfig(edpsoedaconfigfile); + logger.info("EditDistancePSO EDA config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(edpsoedaconfig); + + logger.info("initialize EditDistancePSO and load model"); + try { + edpsoeda.initialize(edpsoedaconfig); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(edpsoeda); + + //construct meta EDA + SimpleMetaEDAConfidenceFeatures meda = new SimpleMetaEDAConfidenceFeatures(edas); + //preprocess test and training data + try { + meda.setTest(true); + meda.initialize(metaconfig); + preprocess(meda); + } catch (Exception e) { + e.printStackTrace(); + } + testMetaEDA(meda); + + meda.shutdown(); + logger.info("EDA shuts down."); + } + + /** + * Tests SimpleMetaEDAConfidenceFeatures in mode 1 (majority vote) with two internal EDAs for English: 1)TIE:Base and 2)Edits:PSO. + * + * 1) loads SimpleMetaEDAConfidenceFeatures configuration file + * 2) initializes TIE and Edits instance + * 3) constructs a SimpleMetaEDAConfidenceFeatures with the two EDABasics + * 4) preprocess test data by calling preprocess(MetaEDA) method + * 5) process test data with SimpleMetaEDAConfidenceFeatures (majority vote) by calling testMetaEDA(MetaEDA) method and prints results to stdout + * + * The sample configuration file is loaded from "./src/test/resources/configuration-file/MetaEDATest1_EN.xml" + * Test and training data, model file directory, overwrite mode are defined there. + * + * Each testing data sample is first processed by both internal EDAs. + * Their decisions are consequently used for the SimpleMetaEDAConfidenceFeatures to find a meta decision, which goes with the majority. + * E.g. Both EDABasics return "NonEntailment", so SimpleMetaEDAConfidenceFeatures will return "NonEntailment" as well. + * Or the opposite case: both return "Entailment", so SimpleMetaEDAConfidenceFeatures decides "Entailment" as well. + * If one of them votes for "Entailment", and one for "NonEntailment", SimpleMetaEDAConfidenceFeatures will decide "NonEntailment", as reaction to a tie. + * + * Of course, more than two EDABasic instances can be included. + * Note that they need to be initialized before constructing a SimpleMetaEDAConfidenceFeatures. + * + * Note that some EDABasics require certain linguistic preprocessing steps. + * The user has to provide these annotation layers on training and test data by calling linguistic preprocessing tools in the preprocess(MetaEDA) method. + * For this example, data is preprocessed with TreeTagger. + * + * @throws EDAException + * @throws ComponentException + * @throws ConfigurationException + */ + public void test1EN() { + + logger.info("SimpleMetaEDAConfidenceFeatures test (mode 1) started"); + + File metaconfigFile = new File("./src/test/resources/configuration-file/MetaEDATest1_EN.xml"); + + Assume.assumeTrue(metaconfigFile.exists()); + CommonConfig metaconfig = null; + // read in the configuration from the file + try { + metaconfig = new ImplCommonConfig(metaconfigFile); + } catch (ConfigurationException e2) { + e2.printStackTrace(); + } + Assume.assumeNotNull(metaconfig); + + ArrayList> edas = new ArrayList>(); + + //initialize TIE instance + EDABasic eda1 = new MaxEntClassificationEDA(); + EDABasic meceda = eda1; + File mecedaconfigfile = new File("./src/test/resources/configuration-file/MaxEntClassificationEDA_Base_EN.xml"); + CommonConfig mecedaconfig = null; + // read in the configuration from the file + try { + mecedaconfig = new ImplCommonConfig(mecedaconfigfile); + } catch (ConfigurationException e2) { + e2.printStackTrace(); + } + logger.info("MaxEntClassification EDA config file read"); + Assume.assumeNotNull(mecedaconfig); + + logger.info("initialize MaxEntClassification and load model"); + try { + meceda.initialize(mecedaconfig); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(meceda); + +// //initialize EditDistancePSOEDA + EditDistancePSOEDA eda2 = new EditDistancePSOEDA(); + EDABasic edpsoeda = eda2; + File edpsoedaconfigfile = new File("./src/main/resources/configuration-file/EditDistancePSOEDA_EN.xml"); + CommonConfig edpsoedaconfig = null; + try { + // read in the configuration from the file + edpsoedaconfig = new ImplCommonConfig(edpsoedaconfigfile); + logger.info("EditDistancePSO EDA config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(edpsoedaconfig); + + logger.info("initialize EditDistancePSO and load model"); + try { + edpsoeda.initialize(edpsoedaconfig); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(edpsoeda); + + //construct meta EDA + SimpleMetaEDAConfidenceFeatures meda = new SimpleMetaEDAConfidenceFeatures(edas); + //preprocess test and training data + try { + meda.initialize(metaconfig); + preprocess(meda); + logger.info("Initialization done."); + } catch (Exception e) { + e.printStackTrace(); + } + + testMetaEDA(meda); + + meda.shutdown(); + logger.info("EDA shuts down."); + } + + /** + * Tests SimpleMetaEDAConfidenceFeatures in training mode 2 (confidence as features) with two internal EDAs for English: 1)TIE:Base+DB and 2)Edits:PSO + * + * 1) loads SimpleMetaEDAConfidenceFeatures configuration file + * 2) initializes TIE and Edits instance + * 3) constructs a SimpleMetaEDAConfidenceFeatures with the two EDABasics + * 4) preprocess test data by calling preprocess(MetaEDA) method + * 5) trains the SimpleMetaEDAConfidenceFeatures on training data with EDABasic confidences as features + * 5) process test data with SimpleMetaEDAConfidenceFeatures (majority vote) by calling testMetaEDA(MetaEDA) method and prints results to stdout + * + * The sample configuration file is loaded from "./src/test/resources/configuration-file/MetaEDATest2_EN.xml" + * Test and training data, model file directory, overwrite mode are defined there. + * + * Each training data sample is first processed by both internal EDAs. + * Their decisions are consequently used for the SimpleMetaEDAConfidenceFeatures to find a meta decision by serving as features for training of a weka classifier. + * E.g. One EDABasic returns "NonEntailment" and the confidence 0.4, so SimpleMetaEDAConfidenceFeatures uses the feature "-0.4" for training. + * Another EDABasic returns "Entailment" with confidence 0.8, so SimpleMetaEDAConfidenceFeatures uses the feature "0.8" for training. + * After training, the SimpleMetaEDAConfidenceFeatures model is serialized and stored in the file defined in the configuration. + * + * Of course, more than two EDABasic instances can be included. + * Note that they need to be initialized before constructing a SimpleMetaEDAConfidenceFeatures. + * + * Note that some EDABasics require certain linguistic preprocessing steps. + * The user has to provide these annotation layers on training and test data by calling linguistic preprocessing tools in the preprocess(MetaEDA) method. + * For this example, data is preprocessed with TreeTagger. + * + * @throws EDAException + * @throws ComponentException + */ + public void test2EN(){ + + logger.info("SimpleMetaEDAConfidenceFeatures test (mode 2) started"); + + File metaconfigFile = new File("./src/test/resources/configuration-file/MetaEDATest2_EN.xml"); + + Assume.assumeTrue(metaconfigFile.exists()); + CommonConfig metaconfig = null; + try { + // read in the configuration from the file + metaconfig = new ImplCommonConfig(metaconfigFile); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(metaconfig); + + ArrayList> edas = new ArrayList>(); + + //initialize TIE instance + EDABasic tieEDA = new MaxEntClassificationEDA(); + EDABasic meceda2 = tieEDA; + File mecedaconfigfile2 = new File("./src/test/resources/configuration-file/MaxEntClassificationEDA_Base_EN.xml"); + CommonConfig mecedaconfig2 = null; + try { + // read in the configuration from the file + mecedaconfig2 = new ImplCommonConfig(mecedaconfigfile2); + logger.info("MaxEntClassification EDA 2 config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(mecedaconfig2); + + logger.info("initialize MaxEntClassification 2 and load model"); + try { + meceda2.initialize(mecedaconfig2); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(meceda2); + + //initialize EditDistancePSOEDA + EditDistancePSOEDA eda3 = new EditDistancePSOEDA(); + EDABasic edpsoeda = eda3; + File edpsoedaconfigfile = new File("./src/main/resources/configuration-file/EditDistancePSOEDA_EN.xml"); + CommonConfig edpsoedaconfig = null; + try { + // read in the configuration from the file + edpsoedaconfig = new ImplCommonConfig(edpsoedaconfigfile); + logger.info("EditDistancePSO EDA config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(edpsoedaconfig); + + logger.info("initialize EditDistancePSO and load model"); + try { + edpsoeda.initialize(edpsoedaconfig); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(edpsoeda); + + //construct meta EDA + SimpleMetaEDAConfidenceFeatures meda = new SimpleMetaEDAConfidenceFeatures(edas); + //preprocess test and training data + try { + meda.initialize(metaconfig); + preprocess(meda); + meda.startTraining(metaconfig); + } catch (Exception e) { + e.printStackTrace(); + } + testMetaEDA(meda); + + meda.shutdown(); + logger.info("EDA shuts down."); + } + + + /** + * Tests SimpleMetaEDAConfidenceFeatures processing with model file created in test2EN for English (two internal EDAs for English: 1)TIE:Base+DB and 2)Edits:PSO). + * + * 1) loads SimpleMetaEDAConfidenceFeatures configuration file + * 2) initializes TIE and Edits instance + * 3) constructs a SimpleMetaEDAConfidenceFeatures with the two EDABasics and loads trained model + * 4) preprocess test data by calling preprocess(MetaEDA) method + * 5) process test data with SimpleMetaEDAConfidenceFeatures (majority vote) by calling testMetaEDA(MetaEDA) method and prints results to stdout + * + * The sample configuration file is loaded from "./src/test/resources/configuration-file/MetaEDATest3_EN.xml" + * Test and training data, model file directory, overwrite mode are defined there. + * + * Each test data sample is first processed by both internal EDAs. + * Their decisions are consequently used for the SimpleMetaEDAConfidenceFeatures to find a meta decision by serving as features for classifying with the pre-trained weka classifier. + * E.g. One EDABasic returns "NonEntailment" and the confidence 0.4, so SimpleMetaEDAConfidenceFeatures uses the feature "-0.4" for classifying. + * Another EDABasic returns "Entailment" with confidence 0.8, so SimpleMetaEDAConfidenceFeatures uses the feature "0.8" for classifying. + * + * For consistencies sake it is important to initialize and use the same EDABasics as in test2EN(). + * Note that they need to be initialized before constructing a SimpleMetaEDAConfidenceFeatures. + * + * Note that some EDABasics require certain linguistic preprocessing steps. + * The user has to provide these annotation layers on training and test data by calling linguistic preprocessing tools in the preprocess(MetaEDA) method. + * For this example, data is preprocessed with TreeTagger. + * + * @throws EDAException + * @throws ComponentException + */ + public void test3EN(){ + + logger.info("SimpleMetaEDAConfidenceFeatures test (mode 2) processing started"); + + File metaconfigFile = new File("./src/test/resources/configuration-file/MetaEDATest2_EN.xml"); + + Assume.assumeTrue(metaconfigFile.exists()); + CommonConfig metaconfig = null; + try { + // read in the configuration from the file + metaconfig = new ImplCommonConfig(metaconfigFile); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(metaconfig); + + ArrayList> edas = new ArrayList>(); + + //initialize TIE instance + EDABasic eda4 = new MaxEntClassificationEDA(); + EDABasic meceda2 = eda4; + File mecedaconfigfile2 = new File("./src/test/resources/configuration-file/MaxEntClassificationEDA_Base_EN.xml"); + CommonConfig mecedaconfig2 = null; + try { + // read in the configuration from the file + mecedaconfig2 = new ImplCommonConfig(mecedaconfigfile2); + logger.info("MaxEntClassification EDA 2 config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(mecedaconfig2); + + logger.info("initialize MaxEntClassification 2 and load model"); + try { + meceda2.initialize(mecedaconfig2); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(meceda2); + + //initialize EditDistancePSOEDA + EditDistancePSOEDA eda5 = new EditDistancePSOEDA(); + EDABasic edpsoeda = eda5; + File edpsoedaconfigfile = new File("./src/main/resources/configuration-file/EditDistancePSOEDA_EN.xml"); + CommonConfig edpsoedaconfig = null; + try { + // read in the configuration from the file + edpsoedaconfig = new ImplCommonConfig(edpsoedaconfigfile); + logger.info("EditDistancePSO EDA config file read"); + } catch (ConfigurationException e) { + e.printStackTrace(); + } + Assume.assumeNotNull(edpsoedaconfig); + + logger.info("initialize EditDistancePSO and load model"); + try { + edpsoeda.initialize(edpsoedaconfig); + } catch (ConfigurationException | EDAException | ComponentException e1) { + e1.printStackTrace(); + } + edas.add(edpsoeda); + + //construct meta EDA + SimpleMetaEDAConfidenceFeatures meda = new SimpleMetaEDAConfidenceFeatures(edas); + //preprocess test and training data + try { + meda.setTest(true); + meda.initialize(metaconfig); + preprocess(meda); + } catch (Exception e) { + e.printStackTrace(); + } + testMetaEDA(meda); + + meda.shutdown(); + logger.info("EDA shuts down."); + } + + /** + * set to true once pre-processing for German training and test data is done + */ + private boolean preprocessedDE = false; + + /** + * set to true once pre-processing for English training and test data is done + */ + private boolean preprocessedEN = false; + + /** + * Performs test on testing data with initialized MetaEDA and + * prints results to stdout. + * + * First, CASes are built for input data. + * Then, they are processed by MetaEDA. + * A table for all pairs with their gold labels, the internal BasicEDAs' decisions and the MetaEDA's decision is printed. + * Finally, results are printed. This includes the number of correctly predicted T-H-pairs, the number of all input pairs, and the percentage of correct predictions. + * + * @param meda initialized MetaEDA + * @throws EDAException + * @throws ComponentException + */ + private void testMetaEDA(SimpleMetaEDAConfidenceFeatures meda){ + int correct = 0; + int sum = 0; + logger.info("build CASes for input sentence pairs"); + + MaltParser mp = new MaltParser(); + + for (File xmi : FileUtils.listFiles(new File(meda.getTestDir()), new String[] {"xmi"}, false)){ + JCas jcas = null; + try { + jcas = PlatformCASProber.probeXmi(xmi, null); + PlatformCASProber.probeCas(jcas, null); + try { + mp.process(jcas); + } catch (AnalysisEngineProcessException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + } catch (LAPException e) { + e.printStackTrace(); + } + Pair pair = JCasUtil.selectSingle(jcas, Pair.class); + int pairID = Integer.parseInt(pair.getPairID()); + DecisionLabel goldAnswer = null; + try { + goldAnswer = DecisionLabel.getLabelFor(pair.getGoldAnswer()); + } catch (EDAException e) { + e.printStackTrace(); + } //get gold annotation + TEDecision decision = null; + try { + decision = meda.process(jcas); + if (goldAnswer.is(DecisionLabel.NonEntailment)){ + meda.getResults().get(pairID)[0]=-1; + } + else if (goldAnswer.is(DecisionLabel.Entailment)){ + meda.getResults().get(pairID)[0]=1; + } + + } catch (EDAException | ComponentException e) { + e.printStackTrace(); + } + if (decision.getDecision().equals(goldAnswer)){ + correct += 1; + } + sum += 1; + } + float score = (float)correct/sum; + + //comment out if you do not want to get results printed + printDecisionTable(meda); + printResults(sum, correct, score); + + } + + /** + * prints test results to stdout + * @param sum number of pairs tested + * @param correct number of correctly classified pairs + * @param score correct/sum + */ + private void printResults(int sum, int correct, float score) { + //print test results + System.out.println("\nsum "+sum+" - correct "+correct+" ("+score*100+"%)\n"); + } + + /** + * prints a table with detailed overview of decisions to stdout + * pairID | goldLabel | BasicEDAs' decisions | MetaEDA decision + * values <0 -> NonEntailment + * values >0 -> Entailment + * @param meda + */ + private void printDecisionTable(SimpleMetaEDAConfidenceFeatures meda) { + //print detailed classification overview table for test data + if (meda.isConfidenceAsFeature()){ + System.out.println(Arrays.deepToString(meda.getClassifier().coefficients())); + } + + HashMap results = meda.getResults(); + + StringBuffer sb = new StringBuffer(); + sb.append(String.format("%30s", "PairID")+String.format("%30s", "GoldLabel")); + + for (int i=0; i> getSynsetOf(String lemma) throws Wo return ret; } + /** + * Returns the number of synsets of given lemma and POS. + * @param lemma + * @param partOfSpeech + * @return + * @throws WordNetException + * @author Ofer Bronstein + * @since June 2014 + */ + public int getNumberOfSynsets(String lemma, WordNetPartOfSpeech partOfSpeech) throws WordNetException + { + IIndexWord idxWord = jwiRealDictionary.getIndexWord (lemma, JwiUtils.getJwiPartOfSpeec(partOfSpeech)); + if (idxWord==null) { + return 0; + } + return idxWord.getWordIDs().size(); + } + /* * (non-Javadoc) * @see ac.biu.nlp.nlp.instruments.dictionary.wordnet.Dictionary#getSynsetsOf(java.lang.String, ac.biu.nlp.nlp.instruments.dictionary.wordnet.WordNetPartOfSpeech) diff --git a/core/src/main/resources/configuration-file/biutee.xml b/core/src/main/resources/configuration-file/biutee.xml index e8c58098..2d506833 100644 --- a/core/src/main/resources/configuration-file/biutee.xml +++ b/core/src/main/resources/configuration-file/biutee.xml @@ -253,8 +253,8 @@ true -
- distsim-bap +
+ distsim-lin-proximity 20 @@ -264,9 +264,9 @@ - $REDISDATA/reuters/bap/similarity-l2r.rdb + $REDISDATA/reuters/lin/proximity/similarity-l2r.top100.rdb - $REDISDATA/reuters/bap/similarity-r2l.rdb + $REDISDATA/reuters/lin/proximity/similarity-r2l.top100.rdb @@ -277,10 +277,14 @@ te-srv2 7001 --> + + + true +
-
- distsim-lin-proximity +
+ distsim-lin-dependency 20 @@ -288,11 +292,11 @@ - + - $REDISDATA/reuters/lin/proximity/similarity-l2r.rdb + $REDISDATA/reuters/lin/dependency/similarity-l2r.top100.rdb - $REDISDATA/reuters/lin/proximity/similarity-r2l.rdb + $REDISDATA/reuters/lin/dependency/similarity-r2l.top100.rdb @@ -303,10 +307,14 @@ te-srv2 7003 --> + + + true +
-
- distsim-lin-dependency +
+ distsim-bap 20 @@ -314,11 +322,11 @@ - + - $REDISDATA/reuters/lin/dependency/similarity-l2r.rdb + $REDISDATA/reuters/bap/similarity-l2r.top100.rdb - $REDISDATA/reuters/lin/dependency/similarity-r2l.rdb + $REDISDATA/reuters/bap/similarity-r2l.top100.rdb @@ -329,8 +337,12 @@ te-srv2 7005 --> + + + true +
- +
distsim-dirt 20 @@ -342,7 +354,7 @@ - $REDISDATA/reuters/dirt/similarity-l2r.rdb + $REDISDATA/reuters/dirt/similarity-l2r.top100.rdb @@ -351,6 +363,10 @@ te-srv2 7006 --> + + + true +
@@ -364,7 +380,7 @@ - $REDISDATA/reuters/reverb/similarity-l2r.rdb + $REDISDATA/reverb/similarity-l2r.rdb @@ -373,6 +389,10 @@ te-srv2 7007 --> + + + false +
@@ -399,6 +419,10 @@ te-srv2 7010 --> + + + true +
@@ -428,7 +452,13 @@ 10 eu.excitementproject.eop.lexicalminer.definition.classifier.syntacticpatterns.offlineClassifiers.syntacticpatternsLocationsSquare SyntacticOfflinePosRelationLocationSquareClassifier + stopwords-Eyal.txt + + Redirect,Parenthesis,LexicalIDM,SyntacticIDM + + false +
diff --git a/core/src/main/resources/configuration-file/lexlinkers/README.txt b/core/src/main/resources/configuration-file/lexlinkers/README.txt new file mode 100644 index 00000000..1e3610e1 --- /dev/null +++ b/core/src/main/resources/configuration-file/lexlinkers/README.txt @@ -0,0 +1,8 @@ +the files here are essential parts of +core.component.alignment.lexicallink.wrapped classes + +Until we get better LexicalResource implementations .. and (or) more flexible configuration where we can compose on-the-fly. + +so do not remove them unless you know what you do. + +Tae-Gil Noh diff --git a/core/src/main/resources/configuration-file/lexlinkers/WordNetENLinker.xml b/core/src/main/resources/configuration-file/lexlinkers/WordNetENLinker.xml new file mode 100644 index 00000000..cf5893e5 --- /dev/null +++ b/core/src/main/resources/configuration-file/lexlinkers/WordNetENLinker.xml @@ -0,0 +1,35 @@ + + + +
+ 1 +
+ +
+ + + eu.excitementproject.eop.core.component.lexicalknowledge.wordnet.WordnetLexicalResource +
+ + + + + + + + +
+ true + 3.0 + + ../core/src/main/resources/ontologies/EnglishWordNet-dict + true + true + SYNONYM,DERIVATIONALLY_RELATED,HYPERNYM,INSTANCE_HYPERNYM,MEMBER_HOLONYM,PART_HOLONYM,ENTAILMENT,SUBSTANCE_MERONYM + 2 +
+ +
diff --git a/core/src/main/resources/external-data/edit/stopwords_DE.txt b/core/src/main/resources/external-data/edit/stopwords_DE.txt index 689524f1..da2bc020 100644 --- a/core/src/main/resources/external-data/edit/stopwords_DE.txt +++ b/core/src/main/resources/external-data/edit/stopwords_DE.txt @@ -1,2 +1 @@ -essere -avere \ No newline at end of file +sein \ No newline at end of file diff --git a/core/src/main/resources/external-data/edit/stopwords_IT.txt b/core/src/main/resources/external-data/edit/stopwords_IT.txt index da2bc020..689524f1 100644 --- a/core/src/main/resources/external-data/edit/stopwords_IT.txt +++ b/core/src/main/resources/external-data/edit/stopwords_IT.txt @@ -1 +1,2 @@ -sein \ No newline at end of file +essere +avere \ No newline at end of file diff --git a/core/src/test/java/eu/excitementproject/eop/core/EditDistanceEDAwRedisTest.java b/core/src/test/java/eu/excitementproject/eop/core/EditDistanceEDAwRedisTest.java new file mode 100644 index 00000000..67f5d33a --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/EditDistanceEDAwRedisTest.java @@ -0,0 +1,475 @@ +package eu.excitementproject.eop.core; + +import org.apache.uima.jcas.JCas; + +//import eu.excitementproject.eop.core.component.distance.CasCreation; +//import java.util.List; +import java.util.Iterator; +import java.util.ArrayList; +import java.util.List; +import java.io.*; + +import org.apache.uima.cas.FSIterator; +import org.apache.uima.jcas.cas.TOP; + +import eu.excitement.type.entailment.Pair; + +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.utilities.configuration.ImplCommonConfig; +import eu.excitementproject.eop.lap.PlatformCASProber; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +import org.junit.*; +import org.w3c.dom.Attr; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import java.util.logging.Logger; + + +/** This class tests Edit Distance EDA training and testing it + * on a small portion of the RTE-3 data set for English, German and Italian language. + */ +public class EditDistanceEDAwRedisTest { + + static Logger logger = Logger.getLogger(EditDistanceEDAwRedisTest.class + .getName()); + + @Ignore + @Test + public void test() { + + logger.info("testing EditDistanceEDAwRedis ..."); + testItalian(); +// testEnglish(); +// testGerman(); + + } + + /** + * test on the Italian data set + * + * @return + */ + public void testItalian() { + + ArrayList list = new ArrayList(); + + EditDistanceEDAwRedis editDistanceEDA = + new EditDistanceEDAwRedis(); + + try { + + File configFile = new File("./src/main/resources/configuration-file/EditDistanceEDAwRedis_IT.xml"); +// File testDir = new File("./src/main/resources/data-set/ITA/test/"); + File testDir = new File("/tmp/eop-eda-exp/ALMA/test_little/"); + + CommonConfig config = new ImplCommonConfig(configFile); + + //training + long startTime = System.currentTimeMillis(); + + logger.info("Training now"); + + editDistanceEDA.startTraining(config); + long endTime = System.currentTimeMillis(); + logger.info("Time:" + (endTime - startTime)/1000); + editDistanceEDA.shutdown(); + + logger.info("Done training. Initializing EDA for testing"); + + //testing + editDistanceEDA = + new EditDistanceEDAwRedis(); + editDistanceEDA.initialize(config); + + startTime = System.currentTimeMillis(); + + logger.info("Processing file from directory " + testDir); + + for (File xmi : (testDir.listFiles())) { + + logger.info("Processing file " + xmi); + + if (!xmi.getName().endsWith(".xmi")) { + continue; + } + JCas cas = PlatformCASProber.probeXmi(xmi, null); + EditDistanceTEDecision teDecision1 = editDistanceEDA.process(cas); + list.add(getPairID(cas) + "\t" + getGoldLabel(cas) + "\t" + teDecision1.getDecision().toString() + "\t" + teDecision1.getConfidence()); + } + endTime = System.currentTimeMillis(); + logger.info("Time:" + (endTime - startTime)/1000); + + File annotatedFileName = new File("/tmp/redis/EditDistanceEDA_IT_Result.txt"); +// String evaluationFileName = "/tmp/redis/EditDistanceEDA_IT_Eval.xml"; + + save(annotatedFileName, list, false); + list.clear(); + //score(annotatedFileName, evaluationFileName); + editDistanceEDA.shutdown(); + + } catch(Exception e) { + + e.printStackTrace(); + + } + + } + + + /** + * test on the English data set + * + * @return + */ + public void testEnglish() { + + ArrayList list = new ArrayList(); + + EditDistanceEDA editDistanceEDA = + new EditDistanceEDA(); + + try { + + //Without lexical resources + //File configFile = new File("./src/main/resources/configuration-file/EditDistanceEDA_EN.xml"); + //String trainDir = "./src/test/resources/data-set/ENG/dev/"; + //File testDir = new File("./src/test/resources/data-set/ENG/test/"); + //File testDir = new File("/tmp/ENG/test/"); + + File configFile = new File("./src/test/resources/configuration-file/EditDistanceEDA_EN.xml"); + File testDir = new File("./src/test/resources/data-set/ENG/test/"); + + CommonConfig config = new ImplCommonConfig(configFile); + + //training + long startTime = System.currentTimeMillis(); + //editDistanceEDA.setTrainDIR(trainDir); + //editDistanceEDA.setWriteModel(false); + editDistanceEDA.startTraining(config); + long endTime = System.currentTimeMillis(); + logger.info("Time:" + (endTime - startTime)/1000); + editDistanceEDA.shutdown(); + + //testing + editDistanceEDA = + new EditDistanceEDA(); + editDistanceEDA.initialize(config); + + startTime = System.currentTimeMillis(); + for (File xmi : (testDir.listFiles())) { + if (!xmi.getName().endsWith(".xmi")) { + continue; + } + JCas cas = PlatformCASProber.probeXmi(xmi, null); + EditDistanceTEDecision teDecision1 = editDistanceEDA.process(cas); + list.add(getPairID(cas) + "\t" + getGoldLabel(cas) + "\t" + teDecision1.getDecision().toString() + "\t" + teDecision1.getConfidence()); + } + endTime = System.currentTimeMillis(); + logger.info("Time:" + (endTime - startTime)/1000); + + //File annotatedFileName = new File("/tmp/EditDistanceEDA_EN_Result.txt"); + //String evaluationFileName = "/tmp/EditDistanceEDA_EN_Eval.xml"; + + //save(annotatedFileName, list, false); + list.clear(); + //score(annotatedFileName, evaluationFileName); + editDistanceEDA.shutdown(); + + } catch(Exception e) { + + e.printStackTrace(); + + } + + } + + + /** + * test on the German data set + * + * @return + */ + public void testGerman() { + + ArrayList list = new ArrayList(); + + EditDistanceEDA editDistanceEDA = + new EditDistanceEDA(); + + try { + + //Without lexical resources + //File configFile = new File("./src/main/resources/configuration-file/EditDistanceEDA_DE.xml"); + //String trainDir = "./src/test/resources/data-set/GER/dev/"; + //File testDir = new File("./src/test/resources/data-set/GER/test/"); + //File testDir = new File("/tmp/GER/test/"); + + File configFile = new File("./src/test/resources/configuration-file/EditDistanceEDA_DE.xml"); + File testDir = new File("./src/test/resources/data-set/GER/test/"); + + CommonConfig config = new ImplCommonConfig(configFile); + + //training + long startTime = System.currentTimeMillis(); + //editDistanceEDA.setTrainDIR(trainDir); + //editDistanceEDA.setWriteModel(false); + editDistanceEDA.startTraining(config); + long endTime = System.currentTimeMillis(); + logger.info("Time:" + (endTime - startTime)/1000); + editDistanceEDA.shutdown(); + + //testing + editDistanceEDA = + new EditDistanceEDA(); + editDistanceEDA.initialize(config); + + startTime = System.currentTimeMillis(); + for (File xmi : (testDir.listFiles())) { + if (!xmi.getName().endsWith(".xmi")) { + continue; + } + JCas cas = PlatformCASProber.probeXmi(xmi, null); + EditDistanceTEDecision teDecision1 = editDistanceEDA.process(cas); + list.add(getPairID(cas) + "\t" + getGoldLabel(cas) + "\t" + teDecision1.getDecision().toString() + "\t" + teDecision1.getConfidence()); + } + endTime = System.currentTimeMillis(); + logger.info("Time:" + (endTime - startTime)/1000); + + //File annotatedFileName = new File("/tmp/EditDistanceEDA_GER_Result.txt"); + //String evaluationFileName = "/tmp/EditDistanceEDA_GER_Eval.xml"; + + //save(annotatedFileName, list, false); + list.clear(); + //score(annotatedFileName, evaluationFileName); + + editDistanceEDA.shutdown(); + + } catch(Exception e) { + + e.printStackTrace(); + + } + + } + + + /** + * @param aCas + * the JCas object + * @return return the pairID of the T-H pair + */ + protected String getPairID(JCas aCas) { + FSIterator pairIter = aCas.getJFSIndexRepository() + .getAllIndexedFS(Pair.type); + Pair p = (Pair) pairIter.next(); + return p.getPairID(); + } + + + /** + * @param aCas + * the JCas object + * @return if the T-H pair contains the gold answer, return it; otherwise, + * return null + */ + protected String getGoldLabel(JCas aCas) { + FSIterator pairIter = aCas.getJFSIndexRepository() + .getAllIndexedFS(Pair.type); + Pair p = (Pair) pairIter.next(); + if (null == p.getGoldAnswer() || p.getGoldAnswer().equals("") + || p.getGoldAnswer().equals("ABSTAIN")) { + return null; + } else { + return p.getGoldAnswer(); + } + } + + + /** + * save the results + */ + public void save(File file, List list, boolean append) throws Exception { + + BufferedWriter writer = null; + + try { + + writer = new BufferedWriter(new FileWriter(file)); + PrintWriter printout = new PrintWriter(writer, append); + + Iterator iterator = list.iterator(); + while(iterator.hasNext()) { + printout.println(iterator.next()); + } + printout.close(); + + } catch (Exception e) { + throw new Exception(e.getMessage()); + } finally { + if (writer != null) + writer.close(); + } + + } + + /** + * calculate the accuracy + */ + public static void score(File resultFile, String outputFile) { + BufferedReader input; + float pos_corrt = 0f; + float pos_wrong = 0f; + float neg_corrt = 0f; + float neg_wrong = 0f; + try { + input = new BufferedReader(new InputStreamReader(new FileInputStream(resultFile), "UTF-8")); + String line = ""; + while ((line = input.readLine()) != null) { + if (line.trim().length() == 0) { + logger.warning("Empty line. Ignore..."); + continue; + } + String[] items = line.split("\t"); + if (items.length != 4) { + logger.warning("Wrong format! Ignore the line..."); + continue; + } + if (items[1].equalsIgnoreCase("Entailment")) { + if (items[2].equalsIgnoreCase("Entailment")) { + pos_corrt += 1f; + } else if (items[2].equalsIgnoreCase("NonEntailment")) { + pos_wrong += 1f; + } else { + logger.warning("Wrong format! Ignore the line..."); + continue; + } + } else if (items[1].equalsIgnoreCase("NonEntailment")) { + if (items[2].equalsIgnoreCase("NonEntailment")) { + neg_corrt += 1f; + } else if (items[2].equalsIgnoreCase("Entailment")) { + neg_wrong += 1f; + } else { + logger.warning("Wrong format! Ignore the line..."); + continue; + } + } else { + logger.warning("Wrong format! Ignore the line..."); + continue; + } + } + input.close(); +// logger.info(String.valueOf(pos_corrt)); +// logger.info(String.valueOf(pos_wrong)); +// logger.info(String.valueOf(neg_corrt)); +// logger.info(String.valueOf(neg_wrong)); + + float EntailmentGold = pos_corrt + pos_wrong; + float NonEntailmentGold = neg_corrt + neg_wrong; + float Sum = EntailmentGold + NonEntailmentGold; + float EntailmentPrecision = pos_corrt / (pos_corrt + neg_wrong); + float EntailmentRecall = pos_corrt / EntailmentGold; + float EntailmentFMeasure = 2 * EntailmentPrecision * EntailmentRecall / (EntailmentPrecision + EntailmentRecall); + float NonEntailmentPrecision = neg_corrt / (neg_corrt + pos_wrong); + float NonEntailmentRecall = neg_corrt / NonEntailmentGold; + float NonEntailmentFMeasure = 2 * NonEntailmentPrecision * NonEntailmentRecall / (NonEntailmentPrecision + NonEntailmentRecall); + float Accuracy = (pos_corrt + neg_corrt) / Sum; + + // output the result into an XML file + DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); + DocumentBuilder docBuilder = docFactory.newDocumentBuilder(); + Document doc = docBuilder.newDocument(); + Element root = doc.createElement("Result"); + Attr attr_EDA = doc.createAttribute("EDA_Configuration"); + attr_EDA.setValue(resultFile.getName()); + root.setAttributeNode(attr_EDA); + doc.appendChild(root); + + Element pairs = doc.createElement("Total_Pairs"); + pairs.appendChild(doc.createTextNode(String.valueOf((int)Sum))); + root.appendChild(pairs); + + Element acc = doc.createElement("Accuracy"); + acc.appendChild(doc.createTextNode(String.valueOf(Accuracy))); + root.appendChild(acc); + + // positive cases + Element pos = doc.createElement("Positive_Pairs"); + Attr attr_pos = doc.createAttribute("Number"); + attr_pos.setValue(String.valueOf((int)EntailmentGold)); + pos.setAttributeNode(attr_pos); + root.appendChild(pos); + + Element pos_pre = doc.createElement("Precision"); + pos_pre.appendChild(doc.createTextNode(String.valueOf(EntailmentPrecision))); + pos.appendChild(pos_pre); + + Element pos_rec = doc.createElement("Recall"); + pos_rec.appendChild(doc.createTextNode(String.valueOf(EntailmentRecall))); + pos.appendChild(pos_rec); + + Element pos_f = doc.createElement("F_Measure"); + pos_f.appendChild(doc.createTextNode(String.valueOf(EntailmentFMeasure))); + pos.appendChild(pos_f); + + Element pos_cor = doc.createElement("Classified_As_Positive"); + pos_cor.appendChild(doc.createTextNode(String.valueOf((int)pos_corrt))); + pos.appendChild(pos_cor); + + Element pos_wro = doc.createElement("Classified_As_Negative"); + pos_wro.appendChild(doc.createTextNode(String.valueOf((int)pos_wrong))); + pos.appendChild(pos_wro); + + // negative cases + Element neg = doc.createElement("Negative_Pairs"); + Attr attr_neg = doc.createAttribute("Number"); + attr_neg.setValue(String.valueOf((int)NonEntailmentGold)); + neg.setAttributeNode(attr_neg); + root.appendChild(neg); + + Element neg_pre = doc.createElement("Precision"); + neg_pre.appendChild(doc.createTextNode(String.valueOf(NonEntailmentPrecision))); + neg.appendChild(neg_pre); + + Element neg_rec = doc.createElement("Recall"); + neg_rec.appendChild(doc.createTextNode(String.valueOf(NonEntailmentRecall))); + neg.appendChild(neg_rec); + + Element neg_f = doc.createElement("F_Measure"); + neg_f.appendChild(doc.createTextNode(String.valueOf(NonEntailmentFMeasure))); + neg.appendChild(neg_f); + + Element neg_wro = doc.createElement("Classified_As_Positive"); + neg_wro.appendChild(doc.createTextNode(String.valueOf((int)neg_wrong))); + neg.appendChild(neg_wro); + + Element neg_cor = doc.createElement("Classified_As_Negative"); + neg_cor.appendChild(doc.createTextNode(String.valueOf((int)neg_corrt))); + neg.appendChild(neg_cor); + + // write the content into xml file + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + Transformer transformer = transformerFactory.newTransformer(); + DOMSource source = new DOMSource(doc); + StreamResult result = new StreamResult(new File(outputFile)); + + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.transform(source, result); + + logger.info("File saved!"); + + } catch (Exception e) { + logger.warning(e.getMessage()); + } + + } + +} + diff --git a/core/src/test/java/eu/excitementproject/eop/core/component/alignment/lexicallink/LexicalAlignerTest.java b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/lexicallink/LexicalAlignerTest.java new file mode 100644 index 00000000..673c7df7 --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/lexicallink/LexicalAlignerTest.java @@ -0,0 +1,215 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink; + +import static org.junit.Assert.fail; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.util.logging.Logger; + +import org.apache.uima.jcas.JCas; +import org.junit.BeforeClass; +import org.junit.Test; +import org.uimafit.util.JCasUtil; + +import eu.excitement.type.alignment.Link; +import eu.excitementproject.eop.common.utilities.configuration.ImplCommonConfig; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.biu.test.BIUFullLAPConfigured; +import eu.excitementproject.eop.lap.biu.test.BiuTestUtils; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +/** + * Test class to {@link eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner}. + * @author Vered Shwartz + * + */ +public class LexicalAlignerTest { + + // Private Members + private LexicalAligner aligner; + private LAPAccess lap; + private String t1 = "The assassin was convicted and sentenced to death penalty"; + private String h1 = "The killer has been accused of murder and doomed to capital punishment"; + private String t2 = "Kennedy was killed in Dallas"; + private String h2 = "Kennedy was wounded and died in Texas"; + + static Logger logger = Logger.getLogger(LexicalAligner.class.getName()); + + /** + * Initialize the lexical aligner and prepare the tests + */ + public LexicalAlignerTest() { + + try { + + // Create and initialize the aligner + logger.info("Initialize the Lexical Aligner"); + URL configFileURL = getClass().getResource("/configuration-file/LexicalAligner_EN.xml"); + File configFile = new File(configFileURL.getFile()); + ImplCommonConfig commonConfig = new ImplCommonConfig(configFile); + aligner = new LexicalAligner(commonConfig); + + // Load the tokenizer and lemmatizer + try { + lap = new BIUFullLAPConfigured(); + } catch (LAPException e) { + logger.info("Could not load the tokenizer and lemmatizer. " + + e.getMessage()); + } + } catch (Exception e) { + + logger.info("Failed initializing the LexicalAligner tests: " + + e.getMessage()); + } + } + + @BeforeClass + public static void beforeClass() throws IOException { + + // Run test only under BIU environment + BiuTestUtils.assumeBiuEnvironment(); + } + + @Test + public void test1() { + + try { + + // Annotate the first pair with tokens and lemmas + logger.info("Tokenize and lemmatize the sentence pair #1"); + JCas pair1 = lap.generateSingleTHPairCAS(t1, h1); + + // Call the aligner to align T and H of pair 1 + logger.info("Aligning pair #1"); + aligner.annotate(pair1); + logger.info("Finished aligning pair #1"); + + // Print the alignment of pair 1 + JCas hypoView = pair1.getView(LAP_ImplBase.HYPOTHESISVIEW); + + logger.info("Pair 1:"); + logger.info("T: " + t1); + logger.info("H: " + h1); + + boolean assassinKiller = false, deathPenaltyCapitalPunishment = false; + + for (Link link : JCasUtil.select(hypoView, Link.class)) { + + logger.info(String.format("Text phrase: %s, " + + "hypothesis phrase: %s, " + + "id: %s, confidence: %f, direction: %s", + link.getTSideTarget().getCoveredText(), + link.getHSideTarget().getCoveredText(), + link.getID(), link.getStrength(), + link.getDirection().toString())); + + assassinKiller = assassinKiller || ((link.getTSideTarget().getBegin() == 4) && + (link.getTSideTarget().getEnd() == 12) && + (link.getHSideTarget().getBegin() == 4) && + (link.getHSideTarget().getEnd() == 10)); + + deathPenaltyCapitalPunishment = deathPenaltyCapitalPunishment || + ((link.getTSideTarget().getBegin() == 44) && + (link.getTSideTarget().getEnd() == 57) && + (link.getHSideTarget().getBegin() == 52) && + (link.getHSideTarget().getEnd() == 70)); + } + + // Make sure the alignments contain the alignment of + // "assassin" to "killer" + if (!assassinKiller) { + fail("There is no alignment link between 'assassin' and 'killer'." + + " This alignment link appears in: WordNet, BAP, Lin Dependency," + + " Lin Proximity and Wikipedia. " + + " Make sure that at least some of these resources were loaded correctly."); + } + + // Make sure the alignments contain the alignment of + // "death penalty" to "capital punishment" + if (!deathPenaltyCapitalPunishment) { + fail("There is no alignment link between 'death penalty' and 'capital punishment'." + + " This alignment link appears in: WordNet and Wikipedia. " + + " Make sure that at least one of these resources was loaded correctly."); + } + + } catch (Exception e) { + logger.info("Could not process first pair. " + e.getMessage()); + } + } + + @Test + public void test2() { + + try { + + // Annotate the first pair with tokens and lemmas + logger.info("Tokenize and lemmatize the sentence pair #2"); + JCas pair2 = lap.generateSingleTHPairCAS(t2, h2); + + // Call the aligner to align T and H of pair 2 + logger.info("Aligning pair #2"); + aligner.annotate(pair2); + logger.info("Finished aligning pair #2"); + + // Print the alignment of pair 1 + JCas hypoView = pair2.getView(LAP_ImplBase.HYPOTHESISVIEW); + + logger.info("Pair 2:"); + logger.info("T: " + t2); + logger.info("H: " + h2); + + boolean killedWounded = false, dallasTexas = false; + + for (Link link : JCasUtil.select(hypoView, Link.class)) { + + logger.info(String.format("Text phrase: %s, " + + "hypothesis phrase: %s, " + + "id: %s, confidence: %f, direction: %s", + link.getTSideTarget().getCoveredText(), + link.getHSideTarget().getCoveredText(), + link.getID(), link.getStrength(), + link.getDirection().toString())); + + killedWounded = killedWounded || ((link.getTSideTarget().getBegin() == 12) && + (link.getTSideTarget().getEnd() == 18) && + (link.getHSideTarget().getBegin() == 12) && + (link.getHSideTarget().getEnd() == 19)); + + dallasTexas = dallasTexas || ((link.getTSideTarget().getBegin() == 22) && + (link.getTSideTarget().getEnd() == 28) && + (link.getHSideTarget().getBegin() == 32) && + (link.getHSideTarget().getEnd() == 37)); + } + + // Make sure the alignments contain the alignment of + // "killed" to "wounded" + if (!killedWounded) { + fail("There is no alignment link between 'killed' and 'wounded'." + + " This alignment link appears in VerbOcean." + + " Make sure that this resource was loaded correctly."); + } + + // Make sure the alignments contain the alignment of + // "Dallas" to "Texas" + if (!dallasTexas) { + fail("There is no alignment link between 'Dallas' and 'Texas'." + + " This alignment link appears in: WordNet and GEO. " + + " Make sure that at least one of these resources was loaded correctly."); + } + + } catch (Exception e) { + logger.info("Could not process first pair. " + e.getMessage()); + } + } + + @Override + protected void finalize() throws Throwable { + + // Dispose the aligner + aligner.cleanUp(); + + super.finalize(); + } +} diff --git a/core/src/test/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/VerbOceanENLinkTest.java b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/VerbOceanENLinkTest.java new file mode 100644 index 00000000..68e4d8df --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/VerbOceanENLinkTest.java @@ -0,0 +1,69 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped; + +import static org.junit.Assert.*; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Assume; +import org.junit.Test; + +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; + +public class VerbOceanENLinkTest { + + @Test + public void test() { + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.INFO); + Logger testlogger = Logger.getLogger(this.getClass().toString()); + + // prepare a lemmatizer + TreeTaggerEN lemmatizer = null; + try + { + lemmatizer = new TreeTaggerEN(); + lemmatizer.generateSingleTHPairCAS("this is a test.", "TreeTagger in sight?"); + } + catch (Exception e) + { + // check if this is due to missing TreeTagger binary and model. + // In such a case, we just skip this test. + // (see /lap/src/scripts/treetagger/README.txt to how to install TreeTagger) + if (ExceptionUtils.getRootCause(e) instanceof java.io.IOException) + { + testlogger.info("Skipping the test: TreeTagger binary and/or models missing. \n To run this testcase, TreeTagger installation is needed. (see /lap/src/scripts/treetagger/README.txt)"); + Assume.assumeTrue(false); // we won't test this test case any longer. + } + } + + + try { + + // prepare the alinger + //AlignmentComponent wnLinker = new WordNetENLinker("src/main/resources/ontologies/EnglishWordNet-dict"); + //AlignmentComponent voLinker = new VerbOceanENLinker("src/main/resources/VerbOcean/verbocean.unrefined.2004-05-20.txt"); + AlignmentComponent voLinker = new VerbOceanENLinker(); + + + String t1 = "Kennedy was killed in Dallas"; + String h1 = "Kennedy was wounded and died in Texas"; + + JCas aJCas = lemmatizer.generateSingleTHPairCAS(t1, h1); + + voLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + } + catch (Exception e) + { + fail(e.getMessage()); + } + } + +} diff --git a/core/src/test/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/WordNetENLinkerTest.java b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/WordNetENLinkerTest.java new file mode 100644 index 00000000..917ef4de --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/lexicallink/wrapped/WordNetENLinkerTest.java @@ -0,0 +1,68 @@ +package eu.excitementproject.eop.core.component.alignment.lexicallink.wrapped; + +import static org.junit.Assert.*; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Assume; +import org.junit.Test; + +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; + +public class WordNetENLinkerTest { + + @Test + public void test() { + + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.INFO); + Logger testlogger = Logger.getLogger(this.getClass().toString()); + + + // prepare a lemmatizer + TreeTaggerEN lemmatizer = null; + try + { + lemmatizer = new TreeTaggerEN(); + lemmatizer.generateSingleTHPairCAS("this is a test.", "TreeTagger in sight?"); + } + catch (Exception e) + { + // check if this is due to missing TreeTagger binary and model. + // In such a case, we just skip this test. + // (see /lap/src/scripts/treetagger/README.txt to how to install TreeTagger) + if (ExceptionUtils.getRootCause(e) instanceof java.io.IOException) + { + testlogger.info("Skipping the test: TreeTagger binary and/or models missing. \n To run this testcase, TreeTagger installation is needed. (see /lap/src/scripts/treetagger/README.txt)"); + Assume.assumeTrue(false); // we won't test this test case any longer. + } + } + + + try { + // prepare the alinger + AlignmentComponent wnLinker = new WordNetENLinker("src/main/resources/ontologies/EnglishWordNet-dict"); + + String t1 = "The assassin was convicted and sentenced to death penalty"; + String h1 = "The killer has been accused of murder and doomed to capital punishment"; + JCas aJCas = lemmatizer.generateSingleTHPairCAS(t1, h1); + + wnLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + } + catch (Exception e) + { + fail(e.getMessage()); + } + } + + + +} diff --git a/core/src/test/java/eu/excitementproject/eop/core/component/alignment/nemex/NemexAlignerTestEN.java b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/nemex/NemexAlignerTestEN.java new file mode 100644 index 00000000..8432583a --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/nemex/NemexAlignerTestEN.java @@ -0,0 +1,112 @@ +package eu.excitementproject.eop.core.component.alignment.nemex; + +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Test; +import org.uimafit.util.JCasUtil; + +import eu.excitement.type.alignment.Link; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.lap.dkpro.OpenNLPTaggerEN; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +public class NemexAlignerTestEN { + + static Logger logger; + + private NemexAligner aligner; + + @Test + public void test() { + try { + + logger = Logger.getLogger(NemexAligner.class.getName()); + + // prepare JCas + + OpenNLPTaggerEN tokenizer = null; + tokenizer = new OpenNLPTaggerEN(); + + JCas aJCas1 = tokenizer.generateSingleTHPairCAS("I saw a car.", + "I saw an automobile."); + + Logger.getRootLogger().setLevel(Level.INFO); // main log setting: + // set as DEBUG to + // see what's going + // & debug. + + logger.info("Starting alignment for test JCas pair 1"); + alignAndPrint(aJCas1); + logger.info("Finished alignment of test JCas pair 1"); + + /*JCas aJCas2 = tokenizer + .generateSingleTHPairCAS( + "Judge Drew served as Justice until Kennon returned to claim his seat in 1945.", + "Kennon served as Justice."); + + logger.info("Starting alignment for test JCas pair 2"); + alignAndPrint(aJCas2); + logger.info("Finished alignment of test JCas pair 2"); + + + JCas aJCas3 = tokenizer + .generateSingleTHPairCAS( + "Ms. Minton left Australia in 1961 to pursue her studies in London.", + "Ms. Minton was born in Australia."); + + logger.info("Starting alignment for test JCas pair 3"); + alignAndPrint(aJCas3); + logger.info("Finished alignment of test JCas pair 3"); + + + JCas aJCas4 = tokenizer + .generateSingleTHPairCAS( + "Robinson's garden style can be seen today at Gravetye Manor, West Sussex, England, though it is more manicured than it was in Robinson's time.", + "Gravetye Manor is located in West Sussex."); + + logger.info("Starting alignment for test JCas pair 4"); + alignAndPrint(aJCas4); + logger.info("Finished alignment of test JCas pair 4");*/ + + } catch (Exception e) { + logger.info("Could not align the JCas test pair"); + } + } + + private void alignAndPrint(JCas aJCas) + throws PairAnnotatorComponentException { + try { + + logger.info("Initialize the Nemex Aligner"); + + aligner = new NemexAligner( + "src/test/resources/gazetteer/nemexAligner.txt", "#", true, + 3, false, "DICE_SIMILARITY_MEASURE", 0.8); + logger.info("Initialization finished"); + + // align test JCas pair + + aligner.annotate(aJCas); + + // Print the alignment of JCas pair + + JCas hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + for (Link link : JCasUtil.select(hypoView, Link.class)) { + + logger.info(String.format("Text phrase: %s, " + + "hypothesis phrase: %s, " + + "id: %s, confidence: %f, direction: %s", link + .getTSideTarget().getCoveredText(), link + .getHSideTarget().getCoveredText(), link.getID(), link + .getStrength(), link.getDirection().toString())); + + } + } catch (Exception e) { + logger.info("Alignment failed"); + e.printStackTrace(); + + } + } +} diff --git a/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/IdenticalLemmaPhraseLinkerTest.java b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/IdenticalLemmaPhraseLinkerTest.java new file mode 100644 index 00000000..7134f546 --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/IdenticalLemmaPhraseLinkerTest.java @@ -0,0 +1,221 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import static org.junit.Assert.*; + +import java.util.Collection; + +import junit.framework.Assert; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.junit.Assume; +import org.junit.Test; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + +public class IdenticalLemmaPhraseLinkerTest { + + @Test + public void test() { + + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.INFO); + Logger testlogger = Logger.getLogger(this.getClass().toString()); + + // prepare a lemmatizer + TreeTaggerEN lemmatizer = null; + try + { + lemmatizer = new TreeTaggerEN(); + lemmatizer.generateSingleTHPairCAS("this is a test.", "TreeTagger in sight?"); + } + catch (Exception e) + { + // check if this is due to missing TreeTagger binary and model. + // In such a case, we just skip this test. + // (see /lap/src/scripts/treetagger/README.txt to how to install TreeTagger) + if (ExceptionUtils.getRootCause(e) instanceof java.io.IOException) + { + testlogger.info("Skipping the test: TreeTagger binary and/or models missing. \n To run this testcase, TreeTagger installation is needed. (see /lap/src/scripts/treetagger/README.txt)"); + Assume.assumeTrue(false); // we won't test this test case any longer. + } + + fail(e.getMessage()); + } + + testMaxMatchOnPositions(lemmatizer); + + IdenticalLemmaPhraseLinker testInstance = null; + try { + testInstance = new IdenticalLemmaPhraseLinker(); + JCas aJCas = lemmatizer.generateSingleTHPairCAS("This is China's new cat, and a new cat is a good thing.", "This is a new cat, and a thing. Gil?"); + testInstance.annotate(aJCas); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + LAPAccess tokenizer = lemmatizer; + JCas aJCas = null; + // Some RTE pairs, as test. + try { + // RTE3 test pair 17 (some links) + aJCas = tokenizer.generateSingleTHPairCAS( + "David Golinkin is single-handedly responsible for uncovering and re-publishing dozens of responsa of the Committee on Jewish Law and Standards of the Rabbinical Assembly, making them available to the general public in a three-volume set.", + "David Golinkin is the author of dozen of responsa of the Committee on Jewish Law and Standards of the Rabbinical Assembly."); + testInstance.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 18 (0 links...) + aJCas = tokenizer.generateSingleTHPairCAS( + "Ryo Okumoto (born in Osaka, Japan) is a keyboardist, best known for his work with progressive rock group Spock's Beard.", + "The rock group Spock's Beard comes from Japan." + ); + testInstance.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 35 + aJCas = tokenizer.generateSingleTHPairCAS( + "A Revenue Cutter, the ship was named for Harriet Lane, niece of President James Buchanan, who served as Buchanan's White House hostess.", + "Harriet Lane was a relative of President James Buchanan." + ); + testInstance.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 2 + aJCas = tokenizer.generateSingleTHPairCAS( + "Claude Chabrol (born June 24, 1930) is a French movie director and has become well-known in the 40 years since his first film, Le Beau Serge , for his chilling tales of murder, including Le Boucher.", + "Le Boucher was made by a French movie director." + ); + testInstance.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // some German ones ... + try { + // RTE3 test pair 17 + aJCas = tokenizer.generateSingleTHPairCAS( + "David Golinkin ist ganz allein für die Entdeckung und Neuveröffentlichung Dutzender von Erwiderungen des Ausschusses für jüdische Gesetze und Normen der Rabbinerversammlung verantwortlich, so dass sie nun der breiten Öffentlichkeit in einer dreibändigen Reihe zugänglich sind.", + "David Golinkin ist der Autor Dutzender von Erwiderungen des Ausschusses für jüdische Gesetze und Normen der Rabbinerversammlung." + ); + testInstance.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 18 + aJCas = tokenizer.generateSingleTHPairCAS( + "Ryo Okumoto (geboren in Osaka, Japan) ist ein Keyboarder, der für seine Arbeit mit der progressiven Rockgruppe Spocks Beard bekannt ist.", + "Die Rockgruppe Spocks Beard kommt aus Japan." + ); + testInstance.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 35 + aJCas = tokenizer.generateSingleTHPairCAS( + "Das Schiff, ein Zollkutter, wurde nach Harriet Lane benannt, der Nichte des Präsidenten James Buchanan, die im Weißen Haus als Buchanans Hausherrin diente.", + "Harriet Lane war eine Verwandte des Präsidenten James Buchanan." + ); + testInstance.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 2 + aJCas = tokenizer.generateSingleTHPairCAS( + "Claude Chabrol (geboren am 24. Juni 1930) ist ein französischer Regisseur und wurde in den 40er Jahren nach seinem ersten Film, 'Le Beau Serge', berühmt für seine schaurigen Mordgeschichten, wie 'Le Boucher'.", + "Le Boucher wurde von einem französischen Regisseur geleitet." + ); + testInstance.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // Some problematic one ... + try { + aJCas = tokenizer.generateSingleTHPairCAS("Claude Chabrol divorced Agnes, his first wife, to marry the actress Stéphane Audran. His third wife is Aurore Paquiss.", "Aurore Paquiss married Chabrol."); + testInstance.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + + } + catch (Exception e) + { + fail(e.getMessage()); + } + + + + + + } + + + public void testMaxMatchOnPositions(TreeTaggerEN lemmatizer) + { + // okay. get it and test it. + JCas aJCas = null; + try { + aJCas = lemmatizer.generateSingleTHPairCAS("This is China's new cat, and a new cat is a good thing.", "This is a new cat, and a thing."); + } + catch (Exception e) + { + fail (e.getMessage()); + } + + JCas textView = null; + JCas hypoView = null; + try { + textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + } + catch (CASException e) + { + fail(e.getMessage()); + } + + Collection t; + t = JCasUtil.select(textView, Token.class); + Token[] tTokens = t.toArray(new Token[t.size()]); + t = JCasUtil.select(hypoView, Token.class); + Token[] hTokens = t.toArray(new Token[t.size()]); + + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 0 1 2 3 4 5 6 7 8 9 + //("This is China's new cat, and a new cat is a good thing.", "This is a new cat, and a thing."); + + try { + int t1 = IdenticalLemmaPhraseLinker.maxMatchOnPositions(0, 0, tTokens, hTokens); + Assert.assertEquals(2, t1); + int t2 = IdenticalLemmaPhraseLinker.maxMatchOnPositions(4, 3, tTokens, hTokens); + Assert.assertEquals(5, t2); + int t3 = IdenticalLemmaPhraseLinker.maxMatchOnPositions(14, 8, tTokens, hTokens); + Assert.assertEquals(2, t3); + int t4 = IdenticalLemmaPhraseLinker.maxMatchOnPositions(8, 2, tTokens, hTokens); + Assert.assertEquals(3, t4); + int t5 = IdenticalLemmaPhraseLinker.maxMatchOnPositions(10, 6, tTokens, hTokens); + Assert.assertEquals(0, t5); + } + catch (Exception e) + { + fail(e.getMessage()); + } + } +} diff --git a/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerDETest.java b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerDETest.java new file mode 100644 index 00000000..bc0adba6 --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerDETest.java @@ -0,0 +1,93 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import static org.junit.Assert.*; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Ignore; +import org.junit.Test; + +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.dkpro.OpenNLPTaggerDE; + +public class MeteorPhraseLinkerDETest { + + @Ignore // ignore as default. (basic capability tested by super class --- just for further test.) + @Test + public void test() { + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.DEBUG); // to hide openNLP logs + Logger testlogger = Logger.getLogger(this.getClass().toString()); + + // prepare a JCas + JCas aJCas = null; + LAPAccess tokenizer = null; + try + { + tokenizer = new OpenNLPTaggerDE(); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + testlogger.info("This test class may take upto 30 seconds ... "); + + // main class test. + // load test + AlignmentComponent phraseLinker = null; + try { + phraseLinker = new MeteorPhraseLinkerDE(); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + try { + // RTE3 test pair 17 + aJCas = tokenizer.generateSingleTHPairCAS( + "David Golinkin ist ganz allein für die Entdeckung und Neuveröffentlichung Dutzender von Erwiderungen des Ausschusses für jüdische Gesetze und Normen der Rabbinerversammlung verantwortlich, so dass sie nun der breiten Öffentlichkeit in einer dreibändigen Reihe zugänglich sind.", + "David Golinkin ist der Autor Dutzender von Erwiderungen des Ausschusses für jüdische Gesetze und Normen der Rabbinerversammlung." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 18 + aJCas = tokenizer.generateSingleTHPairCAS( + "Ryo Okumoto (geboren in Osaka, Japan) ist ein Keyboarder, der für seine Arbeit mit der progressiven Rockgruppe Spocks Beard bekannt ist.", + "Die Rockgruppe Spocks Beard kommt aus Japan." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 35 + aJCas = tokenizer.generateSingleTHPairCAS( + "Das Schiff, ein Zollkutter, wurde nach Harriet Lane benannt, der Nichte des Präsidenten James Buchanan, die im Weißen Haus als Buchanans Hausherrin diente.", + "Harriet Lane war eine Verwandte des Präsidenten James Buchanan." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 2 + aJCas = tokenizer.generateSingleTHPairCAS( + "Claude Chabrol (geboren am 24. Juni 1930) ist ein französischer Regisseur und wurde in den 40er Jahren nach seinem ersten Film, 'Le Beau Serge', berühmt für seine schaurigen Mordgeschichten, wie 'Le Boucher'.", + "Le Boucher wurde von einem französischen Regisseur geleitet." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + + } + catch (Exception e) + { + fail(e.getMessage()); + } + } + +} diff --git a/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerENTest.java b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerENTest.java new file mode 100644 index 00000000..58d0ed53 --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerENTest.java @@ -0,0 +1,93 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import static org.junit.Assert.*; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Ignore; +import org.junit.Test; + +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.lap.dkpro.OpenNLPTaggerEN; + +public class MeteorPhraseLinkerENTest { + + @Ignore // ignore as default. (basic capability tested by super class --- just for further test.) + @Test + public void test() { + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.DEBUG); + Logger testlogger = Logger.getLogger(this.getClass().toString()); + + testlogger.info("This test class may take upto 30 seconds ... "); + + // prepare a JCas + JCas aJCas = null; + OpenNLPTaggerEN tokenizer = null; + try + { + tokenizer = new OpenNLPTaggerEN(); + aJCas = tokenizer.generateSingleTHPairCAS("This is a cat.", "This is China's new cat."); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // main class test. + // load test + AlignmentComponent phraseLinker = null; + try { + phraseLinker = new MeteorPhraseLinkerEN(); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + try { + // RTE3 test pair 17 (some links) + aJCas = tokenizer.generateSingleTHPairCAS( + "David Golinkin is single-handedly responsible for uncovering and re-publishing dozens of responsa of the Committee on Jewish Law and Standards of the Rabbinical Assembly, making them available to the general public in a three-volume set.", + "David Golinkin is the author of dozen of responsa of the Committee on Jewish Law and Standards of the Rabbinical Assembly."); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 18 (0 links...) + aJCas = tokenizer.generateSingleTHPairCAS( + "Ryo Okumoto (born in Osaka, Japan) is a keyboardist, best known for his work with progressive rock group Spock's Beard.", + "The rock group Spock's Beard comes from Japan." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 35 + aJCas = tokenizer.generateSingleTHPairCAS( + "A Revenue Cutter, the ship was named for Harriet Lane, niece of President James Buchanan, who served as Buchanan's White House hostess.", + "Harriet Lane was a relative of President James Buchanan." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 2 + aJCas = tokenizer.generateSingleTHPairCAS( + "Claude Chabrol (born June 24, 1930) is a French movie director and has become well-known in the 40 years since his first film, Le Beau Serge , for his chilling tales of murder, including Le Boucher.", + "Le Boucher was made by a French movie director." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + + } + catch (Exception e) + { + fail(e.getMessage()); + } + + } + +} diff --git a/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerITTest.java b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerITTest.java new file mode 100644 index 00000000..7a34a087 --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseLinkerITTest.java @@ -0,0 +1,95 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import static org.junit.Assert.*; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Ignore; +import org.junit.Test; + +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.dkpro.OpenNLPTaggerIT; + +public class MeteorPhraseLinkerITTest { + + @Ignore // ignore as default. (basic capability tested by super class --- just for further test.) + @Test + public void test() { + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.INFO); + Logger testlogger = Logger.getLogger(this.getClass().toString()); + + testlogger.info("This test class may take upto 30 seconds ... "); + + // prepare a JCas + JCas aJCas = null; + LAPAccess tokenizer = null; + try + { + tokenizer = new OpenNLPTaggerIT(); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // load test + AlignmentComponent phraseLinker = null; + try { + phraseLinker = new MeteorPhraseLinkerIT(); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + + try { + // RTE3 test pair 17 + aJCas = tokenizer.generateSingleTHPairCAS( + "David Golinkin è da solo responsabile per la scoperta e la ripubblicazione di dozzine di responsa del Comitato sulla Legge e gli Standard Ebraici dell'Assemblea Rabbinica, rendendole accessibili al pubblico comune in una raccolta di tre volumi.", + "David Golinkin è l'autore di dozzine di responsa del Comitato sulla Legge e gli Standard Ebraici dell'Assemblea Rabbinica." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 18 + aJCas = tokenizer.generateSingleTHPairCAS( + "Ryo Okumoto (nato a Osaka, Japan) è un tastierista, meglio conosciuto per il suo lavoro con il gruppo progressive rock Spock's Beard.", + "Il gruppo rock Spock's Beard viene dal Giappone." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 35 + aJCas = tokenizer.generateSingleTHPairCAS( + "Una Revenue Cutter, la nave venne chiamata così per Harriet Lane, nipote del Presidente James Buchanan, che prestò servizio come hostess di Buchanan alla Casa Bianca.", + "Harriet Lane era una parente del presidente President James Buchanan." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + // RTE3 test pair 2 + aJCas = tokenizer.generateSingleTHPairCAS( + "Claude Chabrol (nato il 24 giugno 1930) è un regista francese diventato famoso negli ultimi 40 anni sin dal suo primo film, Le Beau Serge, per le sue agghiaccianti storie di omicidi, tra cui Le Boucher.", + "Le Boucher è stato girato da un regista francese." + ); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); + + } + catch (Exception e) + { + fail(e.getMessage()); + } + + + + } + +} diff --git a/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseResourceAlignerTest.java b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseResourceAlignerTest.java new file mode 100644 index 00000000..ee69b882 --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseResourceAlignerTest.java @@ -0,0 +1,118 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import static org.junit.Assert.*; + +import java.util.List; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Ignore; +import org.junit.Test; + +import eu.excitement.type.alignment.LinkUtils; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.dkpro.OpenNLPTaggerEN; + +@SuppressWarnings("unused") +public class MeteorPhraseResourceAlignerTest { + + @Test + public void test() { + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.WARN); // to hide openNLP logs + Logger testlogger = Logger.getLogger(this.getClass().toString()); + + // prepare a JCas + JCas aJCas = null; + OpenNLPTaggerEN tokenizer = null; + try + { + tokenizer = new OpenNLPTaggerEN(); + aJCas = tokenizer.generateSingleTHPairCAS("This is a cat.", "This is China's new cat."); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + Logger.getRootLogger().setLevel(Level.INFO); // main log setting: set as DEBUG to see what's going & debug. + testlogger.info("This test class may take upto 30 seconds ... "); + + // phrase candidate extract test + try + { List candidates = null; + // from TEXTVIEW + candidates = MeteorPhraseResourceAligner.getPhraseCandidatesFromSOFA(aJCas.getView(OpenNLPTaggerEN.TEXTVIEW), 6); + testlogger.debug(candidates.size() + " candidates found. They are; "); + // should be 15 candidates + assertEquals(candidates.size(), 15); + for(String s : candidates) + { + testlogger.debug(s); + } + // from HYPOTHESISVIEw + candidates = MeteorPhraseResourceAligner.getPhraseCandidatesFromSOFA(aJCas.getView(OpenNLPTaggerEN.HYPOTHESISVIEW), 6); + testlogger.debug(candidates.size() + " candidates found. They are; "); + // should be 27 candidates + assertEquals(candidates.size(), 27); + for(String s : candidates) + { + testlogger.debug(s); + } + + // once more on HYPOTHESISVIEW, but with less uptoN. + candidates = MeteorPhraseResourceAligner.getPhraseCandidatesFromSOFA(aJCas.getView(OpenNLPTaggerEN.HYPOTHESISVIEW), 4); + testlogger.debug(candidates.size() + " candidates found. They are; "); + // should be 22 candidates + assertEquals(candidates.size(), 22); + for(String s : candidates) + { + testlogger.debug(s); + } + + // empty case. + aJCas = tokenizer.generateSingleTHPairCAS("", "This is China's new cat."); + candidates = MeteorPhraseResourceAligner.getPhraseCandidatesFromSOFA(aJCas.getView(OpenNLPTaggerEN.TEXTVIEW), 4); + // no candidates, but shouldn't make any exception. + assertEquals(candidates.size(), 0); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // main class test. + // load test + MeteorPhraseResourceAligner phraseLinker = null; + try { + phraseLinker = new MeteorPhraseResourceAligner("/meteor-1.5/data/paraphrase-en", 7); + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // alignment test with one CAS +// * 1 2 3 4 +// * 012345678901234567890123456789012345678901234567890 +// * TEXTVIEW SOFA He went there in person to dwell on the importance, and to dwell on the importance. +// * HYPOVIEW SOFA He went there to explain the significance and significance. +// (more than one match, for test) + try { + aJCas = tokenizer.generateSingleTHPairCAS("He went there in person to dwell on the importance, and to dwell on the importance.", "He went there to explain the significance and significance."); + phraseLinker.annotate(aJCas); + LinkUtils.dumpTokenLevelLinks(aJCas, System.out); // this will dump 24 (token level) links + } + catch (Exception e) + { + fail(e.getMessage()); + } + + // .. and on another CAS? + } + +} diff --git a/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseTableTest.java b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseTableTest.java new file mode 100644 index 00000000..c41c2921 --- /dev/null +++ b/core/src/test/java/eu/excitementproject/eop/core/component/alignment/phraselink/MeteorPhraseTableTest.java @@ -0,0 +1,69 @@ +package eu.excitementproject.eop.core.component.alignment.phraselink; + +import static org.junit.Assert.*; + +import java.util.List; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.junit.Ignore; +import org.junit.Test; + +import eu.excitementproject.eop.core.component.alignment.phraselink.MeteorPhraseTable.ScoredString; + +public class MeteorPhraseTableTest { + + @Ignore // blocked, due to the fact that this test is also done by MeteorPhraseResource class test that uses this class. + @Test + public void test() { + + // Set Log4J for the test + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.DEBUG); + Logger testlogger = Logger.getLogger(this.getClass().toString()); + + testlogger.info("This test may take upto 30 seconds..."); + // load test. + // (2.8Ghz Pentium dual takes 26 seconds on loading English paraphrase.) + MeteorPhraseTable englishTable = null; + final long loadStart = System.currentTimeMillis(); + try + { + englishTable = new MeteorPhraseTable("/meteor-1.5/data/paraphrase-en"); + + } + catch (Exception e) + { + // no exception should happen + fail(e.getMessage()); + } + final long loadEnd = System.currentTimeMillis(); + final long duration = ( loadEnd - loadStart ) / 1000; + testlogger.debug("loading took " + duration + " seconds"); + + // read test for known value, english. + String lhs = "all those who have"; + List rhsAndProbList = englishTable.lookupParaphrasesFor(lhs); + assertEquals(rhsAndProbList.size(), 4); + for (ScoredString rhsProbTuple : rhsAndProbList) + { + String rhs = rhsProbTuple.getString(); + Double prob = rhsProbTuple.getScore(); + testlogger.debug(lhs + " -> " + rhs + " : " + prob.toString()); + } + + // null result. + String lhs2 = "bikini atoll"; // not in the resource + rhsAndProbList = englishTable.lookupParaphrasesFor(lhs2); + assertEquals(rhsAndProbList.size(), 0); + for (ScoredString rhsProbTuple : rhsAndProbList) + { + String rhs = rhsProbTuple.getString(); + Double prob = rhsProbTuple.getScore(); + testlogger.debug(lhs + " -> " + rhs + " : " + prob.toString()); + } + } + +} diff --git a/core/src/test/resources/configuration-file/LexicalAligner_EN.xml b/core/src/test/resources/configuration-file/LexicalAligner_EN.xml new file mode 100644 index 00000000..d33ce309 --- /dev/null +++ b/core/src/test/resources/configuration-file/LexicalAligner_EN.xml @@ -0,0 +1,123 @@ + + + +
+ 5 +
+ +
+ + + eu.excitementproject.eop.core.component.lexicalknowledge.wordnet.WordnetLexicalResource + + + eu.excitementproject.eop.distsim.resource.SimilarityStorageBasedLexicalResource + + + eu.excitementproject.eop.distsim.resource.SimilarityStorageBasedLexicalResource + + + eu.excitementproject.eop.distsim.resource.SimilarityStorageBasedLexicalResource + + + eu.excitementproject.eop.core.component.lexicalknowledge.verb_ocean.VerbOceanLexicalResource + + + eu.excitementproject.eop.core.component.lexicalknowledge.geo.RedisBasedGeoLexicalResource + + + eu.excitementproject.eop.core.component.lexicalknowledge.catvar.CatvarLexicalResource + + + eu.excitementproject.eop.lexicalminer.redis.RedisBasedWikipediaLexicalResource +
+ + + + + + + + +
+ false + 3.0 + ../data/WordNet/3.0/dict.wn.orig + true + true + SYNONYM,DERIVATIONALLY_RELATED,HYPERNYM,INSTANCE_HYPERNYM,MEMBER_HOLONYM,PART_HOLONYM,ENTAILMENT,SUBSTANCE_MERONYM + 2 +
+ + +
+ false + 1.0 + distsim-bap + 20 + ../data/RedisData/reuters/bap/similarity-l2r.rdb + ../data/RedisData/reuters/bap/similarity-r2l.rdb +
+ + +
+ false + 1.0 + distsim-lin-proximity + eu.excitementproject.eop.distsim.items.LemmaPosBasedElement + 20 + ../data/RedisData/reuters/lin/proximity/similarity-l2r.rdb + ../data/RedisData/reuters/lin/proximity/similarity-r2l.rdb +
+ + +
+ false + 1.0 + distsim-lin-dependency + eu.excitementproject.eop.distsim.items.LemmaPosBasedElement + 20 + ../data/RedisData/reuters/lin/dependency/similarity-l2r.rdb + ../data/RedisData/reuters/lin/dependency/similarity-r2l.rdb +
+ + +
+ false + 1.0 + geo + 20 + ../data/RedisData/geo/geo-l2r.rdb + ../data/RedisData/geo/geo-r2l.rdb +
+ + +
+ true + unrefined.2004-05-20 + 1 + ../data/VerbOcean/verbocean.unrefined.2004-05-20.txt + STRONGER_THAN +
+ + +
+ true + 2.1 + ../data/CatVar/catvar21 +
+ + +
+ false + 1.0 + wiki + ../data/RedisData/wiki/wiki-l2r.rdb + ../data/RedisData/wiki/wiki-r2l.rdb + 0.001 + 10 + eu.excitementproject.eop.lexicalminer.definition.classifier.syntacticpatterns.offlineClassifiers.syntacticpatternsLocationsSquare + SyntacticOfflinePosRelationLocationSquareClassifier +
+
\ No newline at end of file diff --git a/core/src/test/resources/configuration-file/MaxEntClassificationEDA_Base_EN.xml b/core/src/test/resources/configuration-file/MaxEntClassificationEDA_Base_EN.xml new file mode 100644 index 00000000..1d7e1a31 --- /dev/null +++ b/core/src/test/resources/configuration-file/MaxEntClassificationEDA_Base_EN.xml @@ -0,0 +1,79 @@ + + + +]> + + + + + + +
+ + eu.excitementproject.eop.core.MaxEntClassificationEDA + + EN + + eu.excitementproject.eop.lap.dkpro.MaltParserEN +
+ + +
+
+ + +
+
+ + + +
+ + ./src/main/resources/model/MaxEntClassificationEDAModel_Base_EN + + ./target/EN/dev/ + + ./target/EN/test/ + + 10000,1 + + + BagOfWordsScoring,BagOfLemmasScoring +
+ +
diff --git a/core/src/test/resources/configuration-file/MetaEDATest1_DE.xml b/core/src/test/resources/configuration-file/MetaEDATest1_DE.xml new file mode 100644 index 00000000..7db13931 --- /dev/null +++ b/core/src/test/resources/configuration-file/MetaEDATest1_DE.xml @@ -0,0 +1,43 @@ + + + + + + + + + +
+ + eu.excitementproject.eop.core.metaeda.SimpleMetaEDAConfidenceFeatures + + DE +
+ + +
+ + false + + false + + ./target/MEDAModelTest1_DE.model + + ./target/DE/dev/ + + ./target/DE/test/ +
+ +
diff --git a/core/src/test/resources/configuration-file/MetaEDATest1_EN.xml b/core/src/test/resources/configuration-file/MetaEDATest1_EN.xml new file mode 100644 index 00000000..618d43e0 --- /dev/null +++ b/core/src/test/resources/configuration-file/MetaEDATest1_EN.xml @@ -0,0 +1,43 @@ + + + + + + + + + +
+ + eu.excitementproject.eop.core.metaeda.SimpleMetaEDAConfidenceFeatures + + EN +
+ + +
+ + false + + false + + ./target/MEDAModelTest1_EN.model + + ./target/EN/dev/ + + ./target/EN/test/ +
+ +
diff --git a/core/src/test/resources/configuration-file/MetaEDATest2_DE.xml b/core/src/test/resources/configuration-file/MetaEDATest2_DE.xml new file mode 100644 index 00000000..a395284e --- /dev/null +++ b/core/src/test/resources/configuration-file/MetaEDATest2_DE.xml @@ -0,0 +1,43 @@ + + + + + + + + + +
+ + eu.excitementproject.eop.core.metaeda.SimpleMetaEDAConfidenceFeatures + + DE +
+ + +
+ + true + + false + + ./target/MEDAModelTest2_DE.model + + ./target/DE/dev/ + + ./target/DE/test/ +
+ +
diff --git a/core/src/test/resources/configuration-file/MetaEDATest2_EN.xml b/core/src/test/resources/configuration-file/MetaEDATest2_EN.xml new file mode 100644 index 00000000..5dd3c9d2 --- /dev/null +++ b/core/src/test/resources/configuration-file/MetaEDATest2_EN.xml @@ -0,0 +1,43 @@ + + + + + + + + + +
+ + eu.excitementproject.eop.core.metaeda.SimpleMetaEDAConfidenceFeatures + + EN +
+ + +
+ + true + + false + + ./target/MEDAModelTest2_EN.model + + ./target/EN/dev/ + + ./target/EN/test/ +
+ +
diff --git a/core/src/test/resources/gazetteer/nemexAligner.txt b/core/src/test/resources/gazetteer/nemexAligner.txt new file mode 100644 index 00000000..a7b073c9 --- /dev/null +++ b/core/src/test/resources/gazetteer/nemexAligner.txt @@ -0,0 +1 @@ +0 utf-8 EN 0 0 diff --git a/core/src/test/resources/model/Gate-3.1/plugins/Tools/resources/morph/default.rul b/core/src/test/resources/model/Gate-3.1/plugins/Tools/resources/morph/default.rul new file mode 100644 index 00000000..6ee5cb28 --- /dev/null +++ b/core/src/test/resources/model/Gate-3.1/plugins/Tools/resources/morph/default.rul @@ -0,0 +1,1452 @@ +defineVars +#A ==> [-a-z0-9] +A ==> [abcdefghijklmnopqrstuvwxyz0123456789-] +V ==> [aeiou] +VI ==> [aeiouy] +C ==> [bcdfghjklmnpqrstvwxyz] +CX ==> [bcdfghjklmnpqrstvwxz] +CX2 ==> "bb" OR "cc" OR "dd" OR "ff" OR "gg" OR "hh" OR "jj" OR "kk" OR "ll" OR "mm" OR "nn" OR "pp" OR "qq" OR "rr" OR "ss" OR "tt" OR "vv" OR "ww" OR "xx" OR "zz" +CX2S ==> "ff" OR "ss" OR "zz" +S ==> "s" OR "x" OR "ch" OR "sh" +PRE ==> "be" OR "ex" OR "in" OR "mis" OR "pre" OR "pro" OR "re" +EDING ==> "ed" OR "ing" +ESEDING ==> "es" OR "ed" OR "ing" + + +defineRules + +#misc. irregular exceptions +<*>"aches" ==> irreg_stem("ache","s") +"ach"{EDING} ==> semi_reg_stem(0,"e") +"being" ==> irreg_stem("be","ing") +"accustom"{EDING} ==> semi_reg_stem(0,"") +"blossom"{EDING} ==> semi_reg_stem(0,"") +"boycott"{EDING} ==> semi_reg_stem(0,"") +"catalog"{EDING} ==> semi_reg_stem(0,"") +{PRE}*"creat"{EDING} ==> semi_reg_stem(0,"e") +"finess"{ESEDING} ==> semi_reg_stem(0,"e") +"interfer"{EDING} ==> semi_reg_stem(0,"e") +{PRE}*"rout"{EDING} ==> semi_reg_stem(0,"e") +"tast"{ESEDING} ==> semi_reg_stem(0,"e") +"torpedo"{EDING} ==> semi_reg_stem(0,"") +"wast"{ESEDING} ==> semi_reg_stem(0,"e") + + +// WordNet irregular verb exceptions + +abode ==> irreg_stem("abide","ed") +abought ==> irreg_stem("aby","ed") +abye ==> irreg_stem("aby","") +abyes ==> irreg_stem("aby","s") +acquitt{EDING} ==> semi_reg_stem(1,"") +addrest ==> irreg_stem("address","ed") +ageing ==> irreg_stem("age","ing") +agreed ==> irreg_stem("agree","ed") +am ==> irreg_stem("be","") +anted ==> irreg_stem("ante","ed") +ante{ESEDING} ==> semi_reg_stem(0,"") +antes ==> irreg_stem("ante","s") +arc{EDING} ==> semi_reg_stem(0,"") +arck{EDING} ==> semi_reg_stem(1,"") +are ==> irreg_stem("be","") +arisen ==> irreg_stem("arise","en") +arose ==> irreg_stem("arise","ed") +ate ==> irreg_stem("eat","ed") +awoke ==> irreg_stem("awake","ed") +awoken ==> irreg_stem("awake","en") +baby-sat ==> irreg_stem("baby-sit","ed") +back-pedal{EDING} ==> semi_reg_stem(0,"") +backbit ==> irreg_stem("backbite","ed") +backbiting ==> irreg_stem("backbite","ing") +backbitten ==> irreg_stem("backbite","en") +backslid ==> irreg_stem("backslide","ed") +backslidden ==> irreg_stem("backslide","en") +bad ==> irreg_stem("bid","ed") +bade ==> irreg_stem("bid","ed") +bandieds ==> irreg_stem("bandy","s") +banquet{EDING} ==> semi_reg_stem(0,"") +barrel{EDING} ==> semi_reg_stem(0,"") +bastinadoed ==> irreg_stem("bastinado","ed") +beaten ==> irreg_stem("beat","en") +became ==> irreg_stem("become","ed") +bedevil{EDING} ==> semi_reg_stem(0,"") +been ==> irreg_stem("be","en") +befallen ==> irreg_stem("befall","en") +befalling ==> irreg_stem("befall","ing") +befell ==> irreg_stem("befall","ed") +began ==> irreg_stem("begin","ed") +begat ==> irreg_stem("beget","ed") +begirt ==> irreg_stem("begird","ed") +begot ==> irreg_stem("beget","ed") +begotten ==> irreg_stem("beget","en") +beguil{EDING} ==> semi_reg_stem(0,"e") +begun ==> irreg_stem("begin","en") +beheld ==> irreg_stem("behold","ed") +beholden ==> irreg_stem("behold","en") +bejewel{EDING} ==> semi_reg_stem(0,"") +belied ==> irreg_stem("belie","ed") +belies ==> irreg_stem("belie","s") +belying ==> irreg_stem("belie","ing") +benempt ==> irreg_stem("bename","ed") +bent ==> irreg_stem("bend","ed") +besought ==> irreg_stem("beseech","ed") +bespoke ==> irreg_stem("bespeak","ed") +bespoken ==> irreg_stem("bespeak","en") +bestrewn ==> irreg_stem("bestrew","en") +bestrid ==> irreg_stem("bestride","ed") +bestridden ==> irreg_stem("bestride","en") +bestrode ==> irreg_stem("bestride","ed") +betaken ==> irreg_stem("betake","en") +bethought ==> irreg_stem("bethink","ed") +betook ==> irreg_stem("betake","ed") +bevel{EDING} ==> semi_reg_stem(0,"") +bias{ESEDING} ==> semi_reg_stem(0,"") +biass{EDING} ==> semi_reg_stem(0,"") +bidden ==> irreg_stem("bid","en") +bit ==> irreg_stem("bite","ed") +biting ==> irreg_stem("bite","ing") +bitten ==> irreg_stem("bite","en") +bivouack{EDING} ==> semi_reg_stem(1,"") +bled ==> irreg_stem("bleed","ed") +blest ==> irreg_stem("bless","ed") +blew ==> irreg_stem("blow","ed") +blown ==> irreg_stem("blow","en") +blue-pencils ==> irreg_stem("blue-pencil","s") +bogged-down ==> irreg_stem("bog-down","ed") +bogging-down ==> irreg_stem("bog-down","ing") +bogs-down ==> irreg_stem("bog-down","s") +boogied ==> irreg_stem("boogie","ed") +boogies ==> irreg_stem("boogie","s") +bore ==> irreg_stem("bear","ed") +born ==> irreg_stem("bear","en") +borne ==> irreg_stem("bear","en") +bottle-fed ==> irreg_stem("bottle-feed","ed") +bought ==> irreg_stem("buy","ed") +bound ==> irreg_stem("bind","ed") +breast-fed ==> irreg_stem("breast-feed","ed") +bred ==> irreg_stem("breed","ed") +breid ==> irreg_stem("brei","ed") +bringing ==> irreg_stem("bring","ing") +broke ==> irreg_stem("break","ed") +broken ==> irreg_stem("break","en") +brought ==> irreg_stem("bring","ed") +browbeaten ==> irreg_stem("browbeat","en") +buckram{EDING} ==> semi_reg_stem(0,"") +built ==> irreg_stem("build","ed") +buncoed ==> irreg_stem("bunco","ed") +bunkoed ==> irreg_stem("bunko","ed") +burnt ==> irreg_stem("burn","ed") +bushel{EDING} ==> semi_reg_stem(0,"") +bypast ==> irreg_stem("bypass","ed") +came ==> irreg_stem("come","ed") +canal{EDING} ==> semi_reg_stem(0,"") +cancel{EDING} ==> semi_reg_stem(0,"") +carbonadoed ==> irreg_stem("carbonado","ed") +carol{EDING} ==> semi_reg_stem(0,"") +caught ==> irreg_stem("catch","ed") +cavil{EDING} ==> semi_reg_stem(0,"") +cbel{EDING} ==> semi_reg_stem(0,"") +cbell{EDING} ==> semi_reg_stem(1,"") +channel{EDING} ==> semi_reg_stem(0,"") +chassed ==> irreg_stem("chasse","ed") +chasseing ==> irreg_stem("chasse","ing") +chasses ==> irreg_stem("chasse","s") +chevied ==> irreg_stem("chivy","ed") +chevies ==> irreg_stem("chivy","s") +chevying ==> irreg_stem("chivy","ing") +chid ==> irreg_stem("chide","ed") +chidden ==> irreg_stem("chide","en") +chisel{EDING} ==> semi_reg_stem(0,"") +chivvied ==> irreg_stem("chivy","ed") +chivvies ==> irreg_stem("chivy","s") +chivvying ==> irreg_stem("chivy","ing") +chose ==> irreg_stem("choose","ed") +chosen ==> irreg_stem("choose","en") +clad ==> irreg_stem("clothe","ed") +cleft ==> irreg_stem("cleave","ed") +clep{EDING} ==> semi_reg_stem(0,"e") +clept ==> irreg_stem("clepe","ed") +clinging ==> irreg_stem("cling","ing") +cloth{ESEDING} ==> semi_reg_stem(0,"e") +clove ==> irreg_stem("cleave","ed") +cloven ==> irreg_stem("cleave","en") +clung ==> irreg_stem("cling","ed") +co-opted ==> irreg_stem("coopt","ed") +co-opting ==> irreg_stem("coopt","ing") +co-opts ==> irreg_stem("coopts","s") +co-ordinate ==> irreg_stem("coordinate","") +co-ordinated ==> irreg_stem("coordinate","ed") +co-ordinates ==> irreg_stem("coordinate","s") +co-ordinating ==> irreg_stem("coordinate","ing") +coiff{EDING} ==> semi_reg_stem(1,"") +combat{EDING} ==> semi_reg_stem(0,"") +concertina{EDING} ==> semi_reg_stem(0,"") +conga{EDING} ==> semi_reg_stem(0,"") +contangoed ==> irreg_stem("contango","ed") +cooeed ==> irreg_stem("cooee","ed") +cooees ==> irreg_stem("cooee","s") +coquett{EDING} ==> semi_reg_stem(1,"") +counsel{EDING} ==> semi_reg_stem(0,"") +countersank ==> irreg_stem("countersink","ed") +countersunk ==> irreg_stem("countersink","en") +court-martiall{EDING} ==> semi_reg_stem(1,"") +crept ==> irreg_stem("creep","ed") +crescendoed ==> irreg_stem("crescendo","ed") +croquet{EDING} ==> semi_reg_stem(0,"") +crossbred ==> irreg_stem("crossbreed","ed") +cudgel{EDING} ==> semi_reg_stem(0,"") +cupel{EDING} ==> semi_reg_stem(0,"") +curettes ==> irreg_stem("curet","s") +curst ==> irreg_stem("curse","ed") +dealt ==> irreg_stem("deal","ed") +debuss{ESEDING} ==> semi_reg_stem(1,"") +decreed ==> irreg_stem("decree","ed") +deep-freeze ==> irreg_stem("deepfreeze","") +deep-freezed ==> irreg_stem("deepfreeze","ed") +deep-freezes ==> irreg_stem("deepfreeze","s") +deep-frozen ==> irreg_stem("deepfreeze","en") +degases ==> irreg_stem("degas","s") +degass{ESEDING} ==> semi_reg_stem(1,"") +deleing ==> irreg_stem("dele","ing") +devil{EDING} ==> semi_reg_stem(0,"") +diagram{EDING} ==> semi_reg_stem(0,"") +diall{EDING} ==> semi_reg_stem(1,"") +did ==> irreg_stem("do","ed") +disagreed ==> irreg_stem("disagree","ed") +disembowel{EDING} ==> semi_reg_stem(0,"") +disenthralls ==> irreg_stem("disenthral","s") +disenthrals ==> irreg_stem("disenthrall","s") +dishevel{EDING} ==> semi_reg_stem(0,"") +dittoed ==> irreg_stem("ditto","ed") +done ==> irreg_stem("do","en") +dought ==> irreg_stem("dow","ed") +dove ==> irreg_stem("dive","ed") +drank ==> irreg_stem("drink","ed") +drawn ==> irreg_stem("draw","en") +dreamt ==> irreg_stem("dream","ed") +dreed ==> irreg_stem("dree","ed") +drew ==> irreg_stem("draw","ed") +drivel{EDING} ==> semi_reg_stem(0,"") +driven ==> irreg_stem("drive","en") +drove ==> irreg_stem("drive","ed") +drunk ==> irreg_stem("drink","en") +duell{EDING} ==> semi_reg_stem(1,"") +dug ==> irreg_stem("dig","ed") +dwelt ==> irreg_stem("dwell","ed") +eaten ==> irreg_stem("eat","en") +echoed ==> irreg_stem("echo","ed") +embargoed ==> irreg_stem("embargo","ed") +embuss{ESEDING} ==> semi_reg_stem(1,"") +emceed ==> irreg_stem("emcee","ed") +empanel{EDING} ==> semi_reg_stem(0,"") +enamel{EDING} ==> semi_reg_stem(0,"") +enwound ==> irreg_stem("enwind","ed") +equal{EDING} ==> semi_reg_stem(0,"") +equall{EDING} ==> semi_reg_stem(1,"") +equipp{EDING} ==> semi_reg_stem(1,"") +eying ==> irreg_stem("eye","ing") +facsimileing ==> irreg_stem("facsimile","ing") +fallen ==> irreg_stem("fall","en") +"fed" ==> irreg_stem("feed","ed") +feed ==> irreg_stem("fee","ed") +fell ==> irreg_stem("fall","ed") +felt ==> irreg_stem("feel","ed") +filagreed ==> irreg_stem("filagree","ed") +filigreed ==> irreg_stem("filigree","ed") +fillagreed ==> irreg_stem("fillagree","ed") +fine-drawn ==> irreg_stem("fine-draw","en") +fine-drew ==> irreg_stem("fine-draw","ed") +flannel{EDING} ==> semi_reg_stem(0,"") +fled ==> irreg_stem("flee","ed") +flew ==> irreg_stem("fly","ed") +flinging ==> irreg_stem("fling","ing") +floodlit ==> irreg_stem("floodlight","ed") +flown ==> irreg_stem("fly","en") +flung ==> irreg_stem("fling","ed") +flyblew ==> irreg_stem("flyblow","ed") +flyblown ==> irreg_stem("flyblow","en") +forbad ==> irreg_stem("forbid","ed") +forbade ==> irreg_stem("forbid","ed") +forbidden ==> irreg_stem("forbid","en") +forbore ==> irreg_stem("forbear","ed") +forborne ==> irreg_stem("forbear","en") +force-fed ==> irreg_stem("force-feed","ed") +fordid ==> irreg_stem("fordo","ed") +fordone ==> irreg_stem("fordo","en") +foredid ==> irreg_stem("foredo","ed") +foredone ==> irreg_stem("foredo","en") +foregone ==> irreg_stem("forego","en") +foreknew ==> irreg_stem("foreknow","ed") +foreknown ==> irreg_stem("foreknow","en") +foreran ==> irreg_stem("forerun","ed") +foresaw ==> irreg_stem("foresee","ed") +foreseen ==> irreg_stem("foresee","en") +foreshown ==> irreg_stem("foreshow","en") +forespoke ==> irreg_stem("forespeak","ed") +forespoken ==> irreg_stem("forespeak","en") +foretelling ==> irreg_stem("foretell","ing") +foretold ==> irreg_stem("foretell","ed") +forewent ==> irreg_stem("forego","ed") +forgave ==> irreg_stem("forgive","ed") +forgiven ==> irreg_stem("forgive","en") +forgone ==> irreg_stem("forgo","en") +forgot ==> irreg_stem("forget","ed") +forgotten ==> irreg_stem("forget","en") +forsaken ==> irreg_stem("forsake","en") +forsook ==> irreg_stem("forsake","ed") +forspoke ==> irreg_stem("forspeak","ed") +forspoken ==> irreg_stem("forspeak","en") +forswore ==> irreg_stem("forswear","ed") +forsworn ==> irreg_stem("forswear","en") +forwent ==> irreg_stem("forgo","ed") +fought ==> irreg_stem("fight","ed") +found ==> irreg_stem("find","ed") +freed ==> irreg_stem("free","ed") +fricasseed ==> irreg_stem("fricassee","ed") +frivol{EDING} ==> semi_reg_stem(0,"") +frolick{EDING} ==> semi_reg_stem(1,"") +froze ==> irreg_stem("freeze","ed") +frozen ==> irreg_stem("freeze","en") +fuell{EDING} ==> semi_reg_stem(1,"") +funnel{EDING} ==> semi_reg_stem(0,"") +gainsaid ==> irreg_stem("gainsay","ed") +gambol{EDING} ==> semi_reg_stem(0,"") +gan ==> irreg_stem("gin","en") +garnisheed ==> irreg_stem("garnishee","ed") +gases ==> irreg_stem("gas","s") +gass{ESEDING} ==> semi_reg_stem(1,"") +gave ==> irreg_stem("give","ed") +geed ==> irreg_stem("gee","ed") +gell{EDING} ==> semi_reg_stem(1,"") +gelt ==> irreg_stem("geld","ed") +genned-up ==> irreg_stem("gen-up","ed") +genning-up ==> irreg_stem("gen-up","ing") +gens-up ==> irreg_stem("gen-up","s") +ghostwriting ==> irreg_stem("ghostwrite","ing") +ghostwritten ==> irreg_stem("ghostwrite","en") +ghostwrote ==> irreg_stem("ghostwrite","ed") +gilt ==> irreg_stem("gild","ed") +girt ==> irreg_stem("gird","ed") +given ==> irreg_stem("give","en") +glace{EDING} ==> semi_reg_stem(0,"") +gnawn ==> irreg_stem("gnaw","en") +gone ==> irreg_stem("go","en") +got ==> irreg_stem("get","ed") +gotten ==> irreg_stem("get","en") +gravel{EDING} ==> semi_reg_stem(0,"") +graven ==> irreg_stem("grave","en") +greed ==> irreg_stem("gree","ed") +grew ==> irreg_stem("grow","ed") +gript ==> irreg_stem("grip","ed") +ground ==> irreg_stem("grind","ed") +grovel{EDING} ==> semi_reg_stem(0,"") +grown ==> irreg_stem("grow","en") +guaranteed ==> irreg_stem("guarantee","ed") +gumshoes ==> irreg_stem("gumshoe","s") +gypp{EDING} ==> semi_reg_stem(1,"") +hacksawn ==> irreg_stem("hacksaw","en") +had ==> irreg_stem("have","ed") +halloed ==> irreg_stem("hallo","ed") +haloed ==> irreg_stem("halo","ed") +hamstringing ==> irreg_stem("hamstring","ing") +hamstrung ==> irreg_stem("hamstring","ed") +handfed ==> irreg_stem("handfeed","ed") +hansel{EDING} ==> semi_reg_stem(0,"") +has ==> irreg_stem("have","s") +hatchel{EDING} ==> semi_reg_stem(0,"") +heard ==> irreg_stem("hear","ed") +held ==> irreg_stem("hold","ed") +hewn ==> irreg_stem("hew","en") +hid ==> irreg_stem("hide","ed") +hidden ==> irreg_stem("hide","en") +hocus-pocuss{EDING} ==> semi_reg_stem(1,"") +hocuss{EDING} ==> semi_reg_stem(1,"") +hoes ==> irreg_stem("hoe","s") +hogtied ==> irreg_stem("hogtie","ed") +hogties ==> irreg_stem("hogtie","s") +hogtying ==> irreg_stem("hogtie","ing") +honied ==> irreg_stem("honey","ed") +horseshoes ==> irreg_stem("horseshoe","s") +housel{EDING} ==> semi_reg_stem(0,"") +hove ==> irreg_stem("heave","ed") +hovel{EDING} ==> semi_reg_stem(0,"") +hung ==> irreg_stem("hang","ed") +impanel{EDING} ==> semi_reg_stem(0,"") +impanells ==> irreg_stem("impanel","s") +inbred ==> irreg_stem("inbreed","ed") +indwelling ==> irreg_stem("indwell","ing") +indwelt ==> irreg_stem("indwell","ed") +initiall{EDING} ==> semi_reg_stem(1,"") +inlaid ==> irreg_stem("inlay","ed") +interbred ==> irreg_stem("interbreed","ed") +interlaid ==> irreg_stem("interlay","ed") +interpled ==> irreg_stem("interplead","ed") +interwove ==> irreg_stem("interweave","ed") +interwoven ==> irreg_stem("interweave","en") +inwove ==> irreg_stem("inweave","ed") +inwoven ==> irreg_stem("inweave","en") +is ==> irreg_stem("be","s") +jerry-built ==> irreg_stem("jerry-build","ed") +jewel{EDING} ==> semi_reg_stem(0,"") +joint ==> irreg_stem("join","ed") +joy-ridden ==> irreg_stem("joy-ride","en") +joy-rode ==> irreg_stem("joy-ride","ed") +kennel{EDING} ==> semi_reg_stem(0,"") +kent ==> irreg_stem("ken","ed") +kept ==> irreg_stem("keep","ed") +kernel{EDING} ==> semi_reg_stem(0,"") +kneed ==> irreg_stem("knee","ed") +knelt ==> irreg_stem("kneel","ed") +knew ==> irreg_stem("know","ed") +known ==> irreg_stem("know","en") +ko'd ==> irreg_stem("ko","ed") +ko'ing ==> irreg_stem("ko","ing") +ko's ==> irreg_stem("ko","s") +label{EDING} ==> semi_reg_stem(0,"") +laden ==> irreg_stem("lade","en") +ladyfied ==> irreg_stem("ladify","ed") +ladyfies ==> irreg_stem("ladify","s") +ladyfying ==> irreg_stem("ladify","ing") +laid ==> irreg_stem("lay","ed") +lain ==> irreg_stem("lie","en") +lassoed ==> irreg_stem("lasso","ed") +laurel{EDING} ==> semi_reg_stem(0,"") +"lay" ==> irreg_stem("lie","ed") +leant ==> irreg_stem("lean","ed") +leapt ==> irreg_stem("leap","ed") +learnt ==> irreg_stem("learn","ed") +led ==> irreg_stem("lead","ed") +left ==> irreg_stem("leave","ed") +lent ==> irreg_stem("lend","ed") +level{EDING} ==> semi_reg_stem(0,"") +libel{EDING} ==> semi_reg_stem(0,"") +lit ==> irreg_stem("light","ed") +lost ==> irreg_stem("lose","ed") +made ==> irreg_stem("make","ed") +marshal{EDING} ==> semi_reg_stem(0,"") +marvel{EDING} ==> semi_reg_stem(0,"") +meant ==> irreg_stem("mean","ed") +medal{EDING} ==> semi_reg_stem(0,"") +met ==> irreg_stem("meet","ed") +metal{EDING} ==> semi_reg_stem(0,"") +might ==> irreg_stem("may","") +mimick{EDING} ==> semi_reg_stem(1,"") +misbecame ==> irreg_stem("misbecome","ed") +misdealt ==> irreg_stem("misdeal","ed") +misgave ==> irreg_stem("misgive","ed") +misgiven ==> irreg_stem("misgive","en") +misheard ==> irreg_stem("mishear","ed") +mislaid ==> irreg_stem("mislay","ed") +misled ==> irreg_stem("mislead","ed") +mispled ==> irreg_stem("misplead","ed") +misspell{EDING} ==> semi_reg_stem(0,"") +misspelt ==> irreg_stem("misspell","ed") +misspent ==> irreg_stem("misspend","ed") +mistaken ==> irreg_stem("mistake","en") +mistook ==> irreg_stem("mistake","ed") +misunderstood ==> irreg_stem("misunderstand","ed") +model{EDING} ==> semi_reg_stem(0,"") +molten ==> irreg_stem("melt","en") +mown ==> irreg_stem("mow","en") +nickel{EDING} ==> semi_reg_stem(0,"") +nielloed ==> irreg_stem("niello","ed") +non-pross{ESEDING} ==> semi_reg_stem(1,"") +nonpluss{ESEDING} ==> semi_reg_stem(1,"") +outbidden ==> irreg_stem("outbid","en") +outbred ==> irreg_stem("outbreed","ed") +outdid ==> irreg_stem("outdo","ed") +outdone ==> irreg_stem("outdo","en") +outgass{ESEDING} ==> semi_reg_stem(1,"") +outgeneral{EDING} ==> semi_reg_stem(0,"") +outgone ==> irreg_stem("outgo","en") +outgrew ==> irreg_stem("outgrow","ed") +outgrown ==> irreg_stem("outgrow","en") +outlaid ==> irreg_stem("outlay","ed") +outran ==> irreg_stem("outrun","ed") +outridden ==> irreg_stem("outride","en") +outrode ==> irreg_stem("outride","ed") +outselling ==> irreg_stem("outsell","ing") +outshone ==> irreg_stem("outshine","ed") +outshot ==> irreg_stem("outshoot","ed") +outsold ==> irreg_stem("outsell","ed") +outstood ==> irreg_stem("outstand","ed") +outthought ==> irreg_stem("outthink","ed") +outwent ==> irreg_stem("outgo","ed") +outwore ==> irreg_stem("outwear","ed") +outworn ==> irreg_stem("outwear","en") +"outwrought" ==> irreg_stem("outwk","ed") +overbidden ==> irreg_stem("overbid","en") +overblew ==> irreg_stem("overblow","ed") +overblown ==> irreg_stem("overblow","en") +overbore ==> irreg_stem("overbear","ed") +overborne ==> irreg_stem("overbear","en") +overbuilt ==> irreg_stem("overbuild","ed") +overcame ==> irreg_stem("overcome","ed") +overdid ==> irreg_stem("overdo","ed") +overdone ==> irreg_stem("overdo","en") +overdrawn ==> irreg_stem("overdraw","en") +overdrew ==> irreg_stem("overdraw","ed") +overdriven ==> irreg_stem("overdrive","en") +overdrove ==> irreg_stem("overdrive","ed") +overflew ==> irreg_stem("overfly","ed") +overgrew ==> irreg_stem("overgrow","ed") +overgrown ==> irreg_stem("overgrow","en") +overhanging ==> irreg_stem("overhang","ing") +overheard ==> irreg_stem("overhear","ed") +overhung ==> irreg_stem("overhang","ed") +"overlaid" ==> irreg_stem("overlay","ed") +overlain ==> irreg_stem("overlie","en") +overlay ==> irreg_stem("overlie","ed") +overlies ==> irreg_stem("overlie","s") +overlying ==> irreg_stem("overlie","ing") +overpaid ==> irreg_stem("overpay","ed") +overpast ==> irreg_stem("overpass","ed") +overran ==> irreg_stem("overrun","ed") +overridden ==> irreg_stem("override","en") +overrode ==> irreg_stem("override","ed") +oversaw ==> irreg_stem("oversee","ed") +overseen ==> irreg_stem("oversee","en") +overselling ==> irreg_stem("oversell","ing") +oversewn ==> irreg_stem("oversew","en") +overshot ==> irreg_stem("overshoot","ed") +overslept ==> irreg_stem("oversleep","ed") +oversold ==> irreg_stem("oversell","ed") +overspent ==> irreg_stem("overspend","ed") +overspill{EDING} ==> semi_reg_stem(0,"") +overspilt ==> irreg_stem("overspill","ed") +overtaken ==> irreg_stem("overtake","en") +overthrew ==> irreg_stem("overthrow","ed") +overthrown ==> irreg_stem("overthrow","en") +overtook ==> irreg_stem("overtake","ed") +overwound ==> irreg_stem("overwind","ed") +overwriting ==> irreg_stem("overwrite","ing") +overwritten ==> irreg_stem("overwrite","en") +overwrote ==> irreg_stem("overwrite","ed") +paid ==> irreg_stem("pay","ed") +pall{EDING} ==> semi_reg_stem(1,"") +panel{EDING} ==> semi_reg_stem(0,"") +panick{EDING} ==> semi_reg_stem(1,"") +parallel{EDING} ==> semi_reg_stem(0,"") +parcel{EDING} ==> semi_reg_stem(0,"") +partaken ==> irreg_stem("partake","en") +partook ==> irreg_stem("partake","ed") +pasquil ==> irreg_stem("pasquinade","") +pasquilled ==> irreg_stem("pasquinade","ed") +pasquilling ==> irreg_stem("pasquinade","ing") +pasquils ==> irreg_stem("pasquinade","s") +pedal{EDING} ==> semi_reg_stem(0,"") +peed ==> irreg_stem("pee","ed") +pencil{EDING} ==> semi_reg_stem(0,"") +pent ==> irreg_stem("pen","ed") +physick{EDING} ==> semi_reg_stem(1,"") +picnick{EDING} ==> semi_reg_stem(1,"") +pistol{EDING} ==> semi_reg_stem(0,"") +pled ==> irreg_stem("plead","ed") +polka{EDING} ==> semi_reg_stem(0,"") +pommel{EDING} ==> semi_reg_stem(0,"") +precancel{EDING} ==> semi_reg_stem(0,"") +prepaid ==> irreg_stem("prepay","ed") +programmes ==> irreg_stem("program","s") +prolog{EDING} ==> semi_reg_stem(0,"ue") +prologs ==> irreg_stem("prologue","s") +proven ==> irreg_stem("prove","en") +pummel{EDING} ==> semi_reg_stem(0,"") +pureed ==> irreg_stem("puree","ed") +quarrel{EDING} ==> semi_reg_stem(0,"") +quartersawn ==> irreg_stem("quartersaw","en") +queued ==> irreg_stem("queue","ed") +queues ==> irreg_stem("queue","s") +queuing ==> irreg_stem("queue","ing") +quick-froze ==> irreg_stem("quick-freeze","ed") +quick-frozen ==> irreg_stem("quick-freeze","en") +quipp{EDING} ==> semi_reg_stem(1,"") +quitt{EDING} ==> semi_reg_stem(1,"") +quizz{ESEDING} ==> semi_reg_stem(1,"") +ran ==> irreg_stem("run","ed") +rang ==> irreg_stem("ring","ed") +raoed ==> irreg_stem("radio","ed") +rarefied ==> irreg_stem("rarefy","ed") +rarefies ==> irreg_stem("rarefy","s") +rarefying ==> irreg_stem("rarefy","ing") +ravel{EDING} ==> semi_reg_stem(0,"") +razeed ==> irreg_stem("razee","ed") +re-trod ==> irreg_stem("re-tread","ed") +re-trodden ==> irreg_stem("re-tread","en") +rebuilt ==> irreg_stem("rebuild","ed") +recced ==> irreg_stem("recce","ed") +recce{EDING} ==> semi_reg_stem(0,"") +red ==> irreg_stem("red","ed") +red-pencil{EDING} ==> semi_reg_stem(0,"") +red-pencils ==> irreg_stem("red-pencil","s") +redid ==> irreg_stem("redo","ed") +redone ==> irreg_stem("redo","en") +refereed ==> irreg_stem("referee","ed") +reft ==> irreg_stem("reave","ed") +refuell{EDING} ==> semi_reg_stem(1,"") +remade ==> irreg_stem("remake","ed") +rent ==> irreg_stem("rend","ed") +repaid ==> irreg_stem("repay","ed") +reran ==> irreg_stem("rerun","ed") +resat ==> irreg_stem("resit","ed") +retaken ==> irreg_stem("retake","en") +rethought ==> irreg_stem("rethink","ed") +retook ==> irreg_stem("retake","ed") +revel{EDING} ==> semi_reg_stem(0,"") +rewound ==> irreg_stem("rewind","ed") +rewriting ==> irreg_stem("rewrite","ing") +rewritten ==> irreg_stem("rewrite","en") +rewrote ==> irreg_stem("rewrite","ed") +ridden ==> irreg_stem("ride","en") +risen ==> irreg_stem("rise","en") +rival{EDING} ==> semi_reg_stem(0,"") +riven ==> irreg_stem("rive","en") +rode ==> irreg_stem("ride","ed") +roquet{EDING} ==> semi_reg_stem(0,"") +rose ==> irreg_stem("rise","ed") +rough-hewn ==> irreg_stem("rough-hew","en") +rove ==> irreg_stem("reeve","ed") +rowel{EDING} ==> semi_reg_stem(0,"") +rung ==> irreg_stem("ring","ing") +said ==> irreg_stem("say","ed") +samba{EDING} ==> semi_reg_stem(0,"") +sang ==> irreg_stem("sing","ed") +sank ==> irreg_stem("sink","ed") +sat ==> irreg_stem("sit","ed") +saute{EDING} ==> semi_reg_stem(0,"") +saw ==> irreg_stem("see","ed") +sawn ==> irreg_stem("saw","en") +seen ==> irreg_stem("see","en") +sent ==> irreg_stem("send","ed") +sewn ==> irreg_stem("sew","en") +shaken ==> irreg_stem("shake","en") +shaven ==> irreg_stem("shave","en") +shed ==> irreg_stem("shed","ed") +shellack{EDING} ==> semi_reg_stem(1,"") +shent ==> irreg_stem("shend","ed") +shewn ==> irreg_stem("shew","en") +shod ==> irreg_stem("shoe","ed") +shoes ==> irreg_stem("shoe","s") +shone ==> irreg_stem("shine","ed") +shook ==> irreg_stem("shake","ed") +shot ==> irreg_stem("shoot","ed") +shovel{EDING} ==> semi_reg_stem(0,"") +shown ==> irreg_stem("show","en") +shrank ==> irreg_stem("shrink","ed") +shrivel{EDING} ==> semi_reg_stem(0,"") +shriven ==> irreg_stem("shrive","en") +shrove ==> irreg_stem("shrive","ed") +shrunk ==> irreg_stem("shrink","en") +shrunken ==> irreg_stem("shrink","en") +sick{EDING} ==> semi_reg_stem(1,"") +sightsaw ==> irreg_stem("sightsee","ed") +sightseen ==> irreg_stem("sightsee","en") +signal{EDING} ==> semi_reg_stem(0,"") +ski'd ==> irreg_stem("ski","ed") +ski{EDING} ==> semi_reg_stem(0,"") +skydove ==> irreg_stem("skydive","ed") +slain ==> irreg_stem("slay","en") +slept ==> irreg_stem("sleep","ed") +slew ==> irreg_stem("slay","ed") +slid ==> irreg_stem("slide","ed") +slidden ==> irreg_stem("slide","en") +slinging ==> irreg_stem("sling","ing") +slung ==> irreg_stem("sling","ed") +slunk ==> irreg_stem("slink","ed") +smelt ==> irreg_stem("smell","ed") +smit ==> irreg_stem("smite","ed") +smiting ==> irreg_stem("smite","ing") +smitten ==> irreg_stem("smite","en") +smote ==> irreg_stem("smite","ed") +snafu{ESEDING} ==> semi_reg_stem(0,"") +snivel{EDING} ==> semi_reg_stem(0,"") +snowshoes ==> irreg_stem("snowshoe","s") +soft-pedal{EDING} ==> semi_reg_stem(0,"") +sol-fa{EDING} ==> semi_reg_stem(0,"") +sold ==> irreg_stem("sell","ed") +soothsaid ==> irreg_stem("soothsay","ed") +sortied ==> irreg_stem("sortie","ed") +sorties ==> irreg_stem("sortie","s") +sought ==> irreg_stem("seek","ed") +sown ==> irreg_stem("sow","en") +spancel{EDING} ==> semi_reg_stem(0,"") +spat ==> irreg_stem("spit","ed") +sped ==> irreg_stem("speed","ed") +spellbound ==> irreg_stem("spellbind","ed") +spelt ==> irreg_stem("spell","ed") +spent ==> irreg_stem("spend","ed") +spilt ==> irreg_stem("spill","ed") +spiral{EDING} ==> semi_reg_stem(0,"") +spoilt ==> irreg_stem("spoil","ed") +spoke ==> irreg_stem("speak","ed") +spoken ==> irreg_stem("speak","en") +spoon-fed ==> irreg_stem("spoon-feed","ed") +spotlit ==> irreg_stem("spotlight","ed") +sprang ==> irreg_stem("spring","ed") +springing ==> irreg_stem("spring","ing") +sprung ==> irreg_stem("spring","en") +spun ==> irreg_stem("spin","ed") +squatt{EDING} ==> semi_reg_stem(1,"") +squeegeed ==> irreg_stem("squeegee","ed") +squibb{EDING} ==> semi_reg_stem(1,"") +squidd{EDING} ==> semi_reg_stem(1,"") +squilgee ==> irreg_stem("squeegee","") +stall-fed ==> irreg_stem("stall-feed","ed") +stank ==> irreg_stem("stink","ed") +stencil{EDING} ==> semi_reg_stem(0,"") +stiletto{EDING} ==> semi_reg_stem(0,"") +stinging ==> irreg_stem("sting","ing") +stole ==> irreg_stem("steal","ed") +stolen ==> irreg_stem("steal","en") +stood ==> irreg_stem("stand","ed") +stove ==> irreg_stem("stave","ed") +strewn ==> irreg_stem("strew","en") +stridden ==> irreg_stem("stride","en") +stringing ==> irreg_stem("string","ing") +striven ==> irreg_stem("strive","en") +strode ==> irreg_stem("stride","ed") +strove ==> irreg_stem("strive","ed") +strown ==> irreg_stem("strow","en") +struck ==> irreg_stem("strike","ed") +strung ==> irreg_stem("string","ed") +stuccoed ==> irreg_stem("stucco","ed") +stuck ==> irreg_stem("stick","ed") +stung ==> irreg_stem("sting","ed") +stunk ==> irreg_stem("stink","en") +stymied ==> irreg_stem("stymie","ed") +stymies ==> irreg_stem("stymie","s") +stymying ==> irreg_stem("stymie","ing") +subpoena{EDING} ==> semi_reg_stem(0,"") +subtotal{EDING} ==> semi_reg_stem(0,"") +sung ==> irreg_stem("sing","en") +sunk ==> irreg_stem("sink","en") +sunken ==> irreg_stem("sink","en") +swam ==> irreg_stem("swim","ed") +swept ==> irreg_stem("sweep","ed") +swinging ==> irreg_stem("swing","ing") +swivel{EDING} ==> semi_reg_stem(0,"") +swollen ==> irreg_stem("swell","en") +swopped ==> irreg_stem("swap","ed") +swopping ==> irreg_stem("swap","ing") +swops ==> irreg_stem("swap","s") +swore ==> irreg_stem("swear","ed") +sworn ==> irreg_stem("swear","en") +swum ==> irreg_stem("swim","en") +swung ==> irreg_stem("swing","ed") +symbol{EDING} ==> semi_reg_stem(0,"") +symboll{EDING} ==> semi_reg_stem(1,"") +taken ==> irreg_stem("take","en") +talc{EDING} ==> semi_reg_stem(0,"") +talck{EDING} ==> semi_reg_stem(1,"") +tally-ho'd ==> irreg_stem("tally-ho","ed") +tally-hoed ==> irreg_stem("tally-ho","ed") +tangoed ==> irreg_stem("tango","ed") +tassel{EDING} ==> semi_reg_stem(0,"") +taught ==> irreg_stem("teach","ed") +taxi{ESEDING} ==> semi_reg_stem(0,"") +taxying ==> irreg_stem("taxi","ing") +te-heed ==> irreg_stem("te-hee","ed") +teed ==> irreg_stem("tee","ed") +thought ==> irreg_stem("think","ed") +threw ==> irreg_stem("throw","ed") +thriven ==> irreg_stem("thrive","en") +throve ==> irreg_stem("thrive","ed") +thrown ==> irreg_stem("throw","en") +tinged ==> irreg_stem("tinge","ed") +tingeing ==> irreg_stem("tinge","ing") +tinging ==> irreg_stem("tinge","ing") +tinsel{EDING} ==> semi_reg_stem(0,"") +tiptoes ==> irreg_stem("tiptoe","s") +toes ==> irreg_stem("toe","s") +told ==> irreg_stem("tell","ed") +took ==> irreg_stem("take","ed") +tore ==> irreg_stem("tear","ed") +torn ==> irreg_stem("tear","en") +torrify ==> irreg_stem("torrefy","") +total{EDING} ==> semi_reg_stem(0,"") +towel{EDING} ==> semi_reg_stem(0,"") +traffick{EDING} ==> semi_reg_stem(1,"") +tramel{EDING} ==> semi_reg_stem(0,"") +tramell{EDING} ==> semi_reg_stem(1,"") +tramels ==> irreg_stem("trammel","s") +transfixt ==> irreg_stem("transfix","ed") +tranship ==> irreg_stem("transship","ed") +travel{EDING} ==> semi_reg_stem(0,"") +trod ==> irreg_stem("tread","ed") +trodden ==> irreg_stem("tread","en") +trowel{EDING} ==> semi_reg_stem(0,"") +tunnel{EDING} ==> semi_reg_stem(0,"") +typewriting ==> irreg_stem("typewrite","ing") +typewritten ==> irreg_stem("typewrite","en") +typewrote ==> irreg_stem("typewrite","ed") +unbent ==> irreg_stem("unbend","ed") +unbound ==> irreg_stem("unbind","ed") +unclad ==> irreg_stem("unclothe","ed") +uncloth{ESEDING} ==> semi_reg_stem(0,"e") +underbought ==> irreg_stem("underbuy","ed") +underfed ==> irreg_stem("underfeed","ed") +undergirt ==> irreg_stem("undergird","ed") +undergone ==> irreg_stem("undergo","en") +underlaid ==> irreg_stem("underlay","ed") +underlain ==> irreg_stem("underlie","en") +"underlay" ==> irreg_stem("underlie","ed") +underlies ==> irreg_stem("underlie","s") +underlying ==> irreg_stem("underlie","ing") +underpaid ==> irreg_stem("underpay","ed") +underselling ==> irreg_stem("undersell","ing") +undershot ==> irreg_stem("undershoot","ed") +undersold ==> irreg_stem("undersell","ed") +understood ==> irreg_stem("understand","ed") +undertaken ==> irreg_stem("undertake","en") +undertook ==> irreg_stem("undertake","ed") +underwent ==> irreg_stem("undergo","ed") +underwriting ==> irreg_stem("underwrite","ing") +underwritten ==> irreg_stem("underwrite","en") +underwrote ==> irreg_stem("underwrite","ed") +undid ==> irreg_stem("undo","ed") +undone ==> irreg_stem("undo","en") +unfroze ==> irreg_stem("unfreeze","ed") +unfrozen ==> irreg_stem("unfreeze","en") +unkennel{EDING} ==> semi_reg_stem(0,"") +unlaid ==> irreg_stem("unlay","ed") +unlearnt ==> irreg_stem("unlearn","ed") +unmade ==> irreg_stem("unmake","ed") +unravel{EDING} ==> semi_reg_stem(0,"") +unrove ==> irreg_stem("unreeve","ed") +unsaid ==> irreg_stem("unsay","ed") +unslinging ==> irreg_stem("unsling","ing") +unslung ==> irreg_stem("unsling","ed") +unspoke ==> irreg_stem("unspeak","ed") +unspoken ==> irreg_stem("unspeak","en") +unstringing ==> irreg_stem("unstring","ing") +unstrung ==> irreg_stem("unstring","ed") +unstuck ==> irreg_stem("unstick","ed") +unswore ==> irreg_stem("unswear","ed") +unsworn ==> irreg_stem("unswear","en") +untaught ==> irreg_stem("unteach","ed") +unthought ==> irreg_stem("unthink","ed") +untied ==> irreg_stem("untie","ed") +unties ==> irreg_stem("untie","s") +untying ==> irreg_stem("untie","ing") +untrod ==> irreg_stem("untread","ed") +untrodden ==> irreg_stem("untread","en") +unwound ==> irreg_stem("unwind","ed") +upbuilt ==> irreg_stem("upbuild","ed") +upheld ==> irreg_stem("uphold","ed") +uphove ==> irreg_stem("upheave","ed") +upped ==> irreg_stem("up","ed") +upping ==> irreg_stem("up","ing") +uprisen ==> irreg_stem("uprise","en") +uprose ==> irreg_stem("uprise","ed") +upsprang ==> irreg_stem("upspring","ed") +upspringing ==> irreg_stem("upspring","ing") +upsprung ==> irreg_stem("upspring","en") +upswell{EDING} ==> semi_reg_stem(0,"") +upswept ==> irreg_stem("upsweep","ed") +upswinging ==> irreg_stem("upswing","ing") +upswollen ==> irreg_stem("upswell","en") +upswung ==> irreg_stem("upswing","ed") +vetoed ==> irreg_stem("veto","ed") +victuall{EDING} ==> semi_reg_stem(1,"") +visaed ==> irreg_stem("visa","ed") +visaing ==> irreg_stem("visa","ing") +vitrioll{EDING} ==> semi_reg_stem(1,"") +viva{EDING} ==> semi_reg_stem(0,"") +was ==> irreg_stem("be","ed") +water-ski'd ==> irreg_stem("water-ski","ed") +water-ski{EDING} ==> semi_reg_stem(0,"") +waylaid ==> irreg_stem("waylay","ed") +waylain ==> irreg_stem("waylay","en") +went ==> irreg_stem("go","ed") +wept ==> irreg_stem("weep","ed") +were ==> irreg_stem("be","ed") +whipsawn ==> irreg_stem("whipsaw","en") +whizz{ESEDING} ==> semi_reg_stem(1,"") +winterfed ==> irreg_stem("winterfeed","ed") +wiredrawn ==> irreg_stem("wiredraw","en") +wiredrew ==> irreg_stem("wiredraw","ed") +withdrawn ==> irreg_stem("withdraw","en") +withdrew ==> irreg_stem("withdraw","ed") +withheld ==> irreg_stem("withhold","ed") +withstood ==> irreg_stem("withstand","ed") +woke ==> irreg_stem("wake","ed") +woken ==> irreg_stem("wake","en") +won ==> irreg_stem("win","ed") +wore ==> irreg_stem("wear","ed") +worn ==> irreg_stem("wear","en") +wound ==> irreg_stem("wind","ed") +wove ==> irreg_stem("weave","ed") +woven ==> irreg_stem("weave","en") +wringing ==> irreg_stem("wring","ing") +writing ==> irreg_stem("write","ing") +written ==> irreg_stem("write","en") +wrote ==> irreg_stem("write","ed") +wrung ==> irreg_stem("wring","ed") +ycleped ==> irreg_stem("clepe","ed") +yclept ==> irreg_stem("clepe","ed") +yodel{EDING} ==> semi_reg_stem(0,"") +zeroed ==> irreg_stem("zero","ed") + + +// WordNet irregular noun exceptions - filtered to remove junk + +addenda ==> irreg_stem("addendum","s") +adieux ==> irreg_stem("adieu","s") +aides-de-camp ==> irreg_stem("aide-de-camp","s") +aliases ==> irreg_stem("alias","s") +alkalies ==> irreg_stem("alkali","s") +aloes ==> irreg_stem("aloe","s") +amanuenses ==> irreg_stem("amanuensis","s") +analyses ==> irreg_stem("analysis","s") +anastomoses ==> irreg_stem("anastomosis","s") +anthraces ==> irreg_stem("anthrax","s") +antitheses ==> irreg_stem("antithesis","s") +aphides ==> irreg_stem("aphis","s") +apices ==> irreg_stem("apex","s") +apotheoses ==> irreg_stem("apotheosis","s") +appendices ==> irreg_stem("appendix","s") +arboreta ==> irreg_stem("arboretum","s") +areg ==> irreg_stem("erg","s") +arterioscleroses ==> irreg_stem("arteriosclerosis","s") +atlantes ==> irreg_stem("atlas","s") +automata ==> irreg_stem("automaton","s") +axises ==> irreg_stem("axis","s") +bambini ==> irreg_stem("bambino","s") +bandeaux ==> irreg_stem("bandeau","s") +banditti ==> irreg_stem("bandit","s") +bassi ==> irreg_stem("basso","s") +beaux ==> irreg_stem("beau","s") +beeves ==> irreg_stem("beef","s") +bicepses ==> irreg_stem("biceps","s") +bijoux ==> irreg_stem("bijou","s") +billets-doux ==> irreg_stem("billet-doux","s") +boraces ==> irreg_stem("borax","s") +bossies ==> irreg_stem("boss","s") +brainchildren ==> irreg_stem("brainchild","s") +brethren ==> irreg_stem("brother","s") +brothers-in-law ==> irreg_stem("brother-in-law","s") +buckteeth ==> irreg_stem("bucktooth","s") +bunde ==> irreg_stem("bund","s") +bureaux ==> irreg_stem("bureau","s") +busses ==> irreg_stem("bus","s") +calves ==> irreg_stem("calf","s") +calyces ==> irreg_stem("calyx","s") +candelabra ==> irreg_stem("candelabrum","s") +capricci ==> irreg_stem("capriccio","s") +caribous ==> irreg_stem("caribou","s") +carides ==> irreg_stem("caryatid","s") +catalyses ==> irreg_stem("catalysis","s") +cerebra ==> irreg_stem("cerebrum","s") +cervices ==> irreg_stem("cervix","s") +chateaux ==> irreg_stem("chateau","s") +cherubim ==> irreg_stem("cherub","s") +children ==> irreg_stem("child","s") +chillies ==> irreg_stem("chilli","s") +chrysalides ==> irreg_stem("chrysalis","s") +chrysalises ==> irreg_stem("chrysalis","s") +ciceroni ==> irreg_stem("cicerone","s") +cloverleaves ==> irreg_stem("cloverleaf","s") +coccyges ==> irreg_stem("coccyx","s") +codices ==> irreg_stem("codex","s") +cola ==> irreg_stem("colon","s") +colloquies ==> irreg_stem("colloquy","s") +colones ==> irreg_stem("colon","s") +concertanti ==> irreg_stem("concertante","s") +concerti ==> irreg_stem("concerto","s") +concertini ==> irreg_stem("concertino","s") +conquistadores ==> irreg_stem("conquistador","s") +contralti ==> irreg_stem("contralto","s") +corpora ==> irreg_stem("corpus","s") +corrigenda ==> irreg_stem("corrigendum","s") +cortices ==> irreg_stem("cortex","s") +cosmoses ==> irreg_stem("cosmos","s") +crescendi ==> irreg_stem("crescendo","s") +crises ==> irreg_stem("crisis","s") +criteria ==> irreg_stem("criterion","s") +cruces ==> irreg_stem("crux","s") +culs-de-sac ==> irreg_stem("cul-de-sac","s") +cyclopes ==> irreg_stem("cyclops","s") +cyclopses ==> irreg_stem("cyclops","s") +data ==> irreg_stem("datum","s") +daughters-in-law ==> irreg_stem("daughter-in-law","s") +desiderata ==> irreg_stem("desideratum","s") +diaereses ==> irreg_stem("diaeresis","s") +diaerses ==> irreg_stem("diaeresis","s") +diagnoses ==> irreg_stem("diagnosis","s") +dialyses ==> irreg_stem("dialysis","s") +diathses ==> irreg_stem("diathesis","s") +dicta ==> irreg_stem("dictum","s") +diereses ==> irreg_stem("dieresis","s") +dilettantes ==> irreg_stem("dilettante","s") +dilettanti ==> irreg_stem("dilettante","s") +divertimenti ==> irreg_stem("divertimento","s") +"does" ==> irreg_stem("doe","s") +dogteeth ==> irreg_stem("dogtooth","s") +dormice ==> irreg_stem("dormouse","s") +dryades ==> irreg_stem("dryad","s") +dui ==> irreg_stem("duo","s") +duona ==> irreg_stem("duodenum","s") +duonas ==> irreg_stem("duodenum","s") +dwarves ==> irreg_stem("dwarf","s") +eisteddfodau ==> irreg_stem("eisteddfod","s") +ellipses ==> irreg_stem("ellipsis","s") +elves ==> irreg_stem("elf","s") +emphases ==> irreg_stem("emphasis","s") +epicentres ==> irreg_stem("epicentre","s") +epiglottides ==> irreg_stem("epiglottis","s") +epiglottises ==> irreg_stem("epiglottis","s") +errata ==> irreg_stem("erratum","s") +exegeses ==> irreg_stem("exegesis","s") +eyeteeth ==> irreg_stem("eyetooth","s") +fathers-in-law ==> irreg_stem("father-in-law","s") +feet ==> irreg_stem("foot","s") +fellaheen ==> irreg_stem("fellah","s") +fellahin ==> irreg_stem("fellah","s") +femora ==> irreg_stem("femur","s") +fezzes ==> irreg_stem("fez","s") +flagstaves ==> irreg_stem("flagstaff","s") +flambeaux ==> irreg_stem("flambeau","s") +flatfeet ==> irreg_stem("flatfoot","s") +fleurs-de-lis ==> irreg_stem("fleur-de-lis","s") +fleurs-de-lys ==> irreg_stem("fleur-de-lys","s") +flyleaves ==> irreg_stem("flyleaf","s") +fora ==> irreg_stem("forum","s") +forcipes ==> irreg_stem("forceps","s") +forefeet ==> irreg_stem("forefoot","s") +fulcra ==> irreg_stem("fulcrum","s") +gallowses ==> irreg_stem("gallows","s") +gases ==> irreg_stem("gas","s") +gasses ==> irreg_stem("gas","s") +gateaux ==> irreg_stem("gateau","s") +geese ==> irreg_stem("goose","s") +gemboks ==> irreg_stem("gemsbok","s") +genera ==> irreg_stem("genus","s") +geneses ==> irreg_stem("genesis","s") +gentlemen-at-arms ==> irreg_stem("gentleman-at-arms","s") +gestalten ==> irreg_stem("gestalt","s") +giraffes ==> irreg_stem("giraffe","s") +glissandi ==> irreg_stem("glissando","s") +glottides ==> irreg_stem("glottis","s") +glottises ==> irreg_stem("glottis","s") +godchildren ==> irreg_stem("godchild","s") +goings-over ==> irreg_stem("going-over","s") +grandchildren ==> irreg_stem("grandchild","s") +halves ==> irreg_stem("half","s") +hangers-on ==> irreg_stem("hanger-on","s") +helices ==> irreg_stem("helix","s") +hooves ==> irreg_stem("hoof","s") +hosen ==> irreg_stem("hose","s") +hypnoses ==> irreg_stem("hypnosis","s") +hypotheses ==> irreg_stem("hypothesis","s") +iambi ==> irreg_stem("iamb","s") +ibices ==> irreg_stem("ibex","s") +ibises ==> irreg_stem("ibis","s") +impedimenta ==> irreg_stem("impediment","s") +indices ==> irreg_stem("index","s") +intagli ==> irreg_stem("intaglio","s") +intermezzi ==> irreg_stem("intermezzo","s") +interregna ==> irreg_stem("interregnum","s") +irides ==> irreg_stem("iris","s") +irises ==> irreg_stem("iris","s") +is ==> irreg_stem("is","s") +jacks-in-the-box ==> irreg_stem("jack-in-the-box","s") +kibbutzim ==> irreg_stem("kibbutz","s") +knives ==> irreg_stem("knife","s") +kohlrabies ==> irreg_stem("kohlrabi","s") +kronen ==> irreg_stem("krone","s") +kroner ==> irreg_stem("krone","s") +kronur ==> irreg_stem("krona","s") +kylikes ==> irreg_stem("kylix","s") +ladies-in-waiting ==> irreg_stem("lady-in-waiting","s") +larynges ==> irreg_stem("larynx","s") +latices ==> irreg_stem("latex","s") +leges ==> irreg_stem("lex","s") +libretti ==> irreg_stem("libretto","s") +lice ==> irreg_stem("louse","s") +lire ==> irreg_stem("lira","s") +lives ==> irreg_stem("life","s") +loaves ==> irreg_stem("loaf","s") +loggie ==> irreg_stem("loggia","s") +lustra ==> irreg_stem("lustre","s") +lyings-in ==> irreg_stem("lying-in","s") +macaronies ==> irreg_stem("macaroni","s") +maestri ==> irreg_stem("maestro","s") +mantes ==> irreg_stem("mantis","s") +mantises ==> irreg_stem("mantis","s") +markkaa ==> irreg_stem("markka","s") +marquises ==> irreg_stem("marquis","s") +masters-at-arms ==> irreg_stem("master-at-arms","s") +matrices ==> irreg_stem("matrix","s") +matzoth ==> irreg_stem("matzo","s") +mausolea ==> irreg_stem("mausoleum","s") +maxima ==> irreg_stem("maximum","s") +meioses ==> irreg_stem("meiosis","s") +memoranda ==> irreg_stem("memorandum","s") +men-at-arms ==> irreg_stem("man-at-arms","s") +men-o'-war ==> irreg_stem("man-of-war","s") +men-of-war ==> irreg_stem("man-of-war","s") +menservants ==> irreg_stem("manservant","s") +mesdemoiselles ==> irreg_stem("mademoiselle","s") +messieurs ==> irreg_stem("monsieur","s") +metamorphoses ==> irreg_stem("metamorphosis","s") +metatheses ==> irreg_stem("metathesis","s") +metempsychoses ==> irreg_stem("metempsychosis","s") +metropolises ==> irreg_stem("metropolis","s") +mice ==> irreg_stem("mouse","s") +milieux ==> irreg_stem("milieu","s") +minima ==> irreg_stem("minimum","s") +momenta ==> irreg_stem("momentum","s") +monies ==> irreg_stem("money","s") +monsignori ==> irreg_stem("monsignor","s") +mooncalves ==> irreg_stem("mooncalf","s") +mothers-in-law ==> irreg_stem("mother-in-law","s") +naiades ==> irreg_stem("naiad","s") +necropoleis ==> irreg_stem("necropolis","s") +necropolises ==> irreg_stem("necropolis","s") +nemeses ==> irreg_stem("nemesis","s") +neuroses ==> irreg_stem("neurosis","s") +novelle ==> irreg_stem("novella","s") +oases ==> irreg_stem("oasis","s") +obloquies ==> irreg_stem("obloquy","s") +octahedra ==> irreg_stem("octahedron","s") +optima ==> irreg_stem("optimum","s") +ora ==> irreg_stem("os","s") +osar ==> irreg_stem("os","s") +ossa ==> irreg_stem("os","s") +ova ==> irreg_stem("ovum","s") +oxen ==> irreg_stem("ox","s") +paralyses ==> irreg_stem("paralysis","s") +parentheses ==> irreg_stem("parenthesis","s") +paris-mutuels ==> irreg_stem("pari-mutuel","s") +pastorali ==> irreg_stem("pastorale","s") +patresfamilias ==> irreg_stem("paterfamilias","s") +pease ==> irreg_stem("pea","s") +pekingese ==> irreg_stem("pekinese","s") +pelves ==> irreg_stem("pelvis","s") +pelvises ==> irreg_stem("pelvis","s") +pence ==> irreg_stem("penny","s") +penes ==> irreg_stem("penis","s") +penises ==> irreg_stem("penis","s") +penknives ==> irreg_stem("penknife","s") +perihelia ==> irreg_stem("perihelion","s") +pfennige ==> irreg_stem("pfennig","s") +pharynges ==> irreg_stem("pharynx","s") +phenomena ==> irreg_stem("phenomenon","s") +philodendra ==> irreg_stem("philodendron","s") +pieds-a-terre ==> irreg_stem("pied-a-terre","s") +pineta ==> irreg_stem("pinetum","s") +plateaux ==> irreg_stem("plateau","s") +plena ==> irreg_stem("plenum","s") +pocketknives ==> irreg_stem("pocketknife","s") +portmanteaux ==> irreg_stem("portmanteau","s") +potlies ==> irreg_stem("potbelly","s") +praxes ==> irreg_stem("praxis","s") +praxises ==> irreg_stem("praxis","s") +proboscides ==> irreg_stem("proboscis","s") +proboscises ==> irreg_stem("proboscis","s") +prostheses ==> irreg_stem("prosthesis","s") +protozoa ==> irreg_stem("protozoan","s") +pudenda ==> irreg_stem("pudendum","s") +putti ==> irreg_stem("putto","s") +quanta ==> irreg_stem("quantum","s") +quarterstaves ==> irreg_stem("quarterstaff","s") +quizzes ==> irreg_stem("quiz","s") +reales ==> irreg_stem("real","s") +recta ==> irreg_stem("rectum","s") +referenda ==> irreg_stem("referendum","s") +reis ==> irreg_stem("real","s") +rhinoceroses ==> irreg_stem("rhinoceros","s") +roes ==> irreg_stem("roe","s") +rondeaux ==> irreg_stem("rondeau","s") +rostra ==> irreg_stem("rostrum","s") +runners-up ==> irreg_stem("runner-up","s") +sancta ==> irreg_stem("sanctum","s") +sawboneses ==> irreg_stem("sawbones","s") +scarves ==> irreg_stem("scarf","s") +scherzi ==> irreg_stem("scherzo","s") +scleroses ==> irreg_stem("sclerosis","s") +scrota ==> irreg_stem("scrotum","s") +secretaries-general ==> irreg_stem("secretary-general","s") +selves ==> irreg_stem("self","s") +sera ==> irreg_stem("serum","s") +seraphim ==> irreg_stem("seraph","s") +sheaves ==> irreg_stem("sheaf","s") +shelves ==> irreg_stem("shelf","s") +simulacra ==> irreg_stem("simulacrum","s") +sisters-in-law ==> irreg_stem("sister-in-law","s") +soli ==> irreg_stem("solo","s") +soliloquies ==> irreg_stem("soliloquy","s") +sons-in-law ==> irreg_stem("son-in-law","s") +spectra ==> irreg_stem("spectrum","s") +sphinges ==> irreg_stem("sphinx","s") +splayfeet ==> irreg_stem("splayfoot","s") +sputa ==> irreg_stem("sputum","s") +stamina ==> irreg_stem("stamen","s") +stelae ==> irreg_stem("stele","s") +stepchildren ==> irreg_stem("stepchild","s") +sterna ==> irreg_stem("sternum","s") +strata ==> irreg_stem("stratum","s") +stretti ==> irreg_stem("stretto","s") +summonses ==> irreg_stem("summons","s") +swamies ==> irreg_stem("swami","s") +swathes ==> irreg_stem("swathe","s") +synopses ==> irreg_stem("synopsis","s") +syntheses ==> irreg_stem("synthesis","s") +tableaux ==> irreg_stem("tableau","s") +taxies ==> irreg_stem("taxi","s") +teeth ==> irreg_stem("tooth","s") +tempi ==> irreg_stem("tempo","s") +tenderfeet ==> irreg_stem("tenderfoot","s") +testes ==> irreg_stem("testis","s") +theses ==> irreg_stem("thesis","s") +thieves ==> irreg_stem("thief","s") +thoraces ==> irreg_stem("thorax","s") +titmice ==> irreg_stem("titmouse","s") +tootses ==> irreg_stem("toots","s") +torsi ==> irreg_stem("torso","s") +tricepses ==> irreg_stem("triceps","s") +triumviri ==> irreg_stem("triumvir","s") +trousseaux ==> irreg_stem("trousseau","s") +turves ==> irreg_stem("turf","s") +tympana ==> irreg_stem("tympanum","s") +ultimata ==> irreg_stem("ultimatum","s") +vacua ==> irreg_stem("vacuum","s") +vertices ==> irreg_stem("vertex","s") +vertigines ==> irreg_stem("vertigo","s") +virtuosi ==> irreg_stem("virtuoso","s") +vortices ==> irreg_stem("vortex","s") +wagons-lits ==> irreg_stem("wagon-lit","s") +weirdies ==> irreg_stem("weirdie","s") +werewolves ==> irreg_stem("werewolf","s") +wharves ==> irreg_stem("wharf","s") +whippers-in ==> irreg_stem("whipper-in","s") +wolves ==> irreg_stem("wolf","s") +woodlice ==> irreg_stem("woodlouse","s") +yogin ==> irreg_stem("yogi","s") +zombies ==> irreg_stem("zombie","s") + + +// Additions from LOB corpus + +"bias" ==> null_stem() +"canvas" ==> null_stem() +"canvas"{ESEDING} ==> semi_reg_stem(0,"") +"cryed" ==> irreg_stem("cry","ed") +"embed" ==> null_stem() +"focuss"{ESEDING} ==> semi_reg_stem(1,"") +"forted" ==> irreg_stem("forte","ed") +"forteing" ==> irreg_stem("forte","ing") +"gas" ==> null_stem() +"picknicks" ==> irreg_stem("picknic","s") +"picknick"{EDING} ==> semi_reg_stem(1,"") +"resold" ==> irreg_stem("resell","ed") +"retold" ==> irreg_stem("retell","ed") +"retying" ==> irreg_stem("retie","ing") +"singed" ==> irreg_stem("singe","ed") +"singeing" ==> irreg_stem("singe","ing") +"trecked" ==> irreg_stem("trek","ed") +"trecking" ==> irreg_stem("trek","ing") +("adher" OR "ador" OR "attun" OR "bast" OR "bor" OR "bronz" OR "can" OR "centr" OR "cit" OR "compet" OR "complet" OR "concret" OR "condon" OR "contraven" OR "conven" OR "cran" OR "delet" OR "delineat" OR "dop" OR "drap" OR "dron" OR "escap" OR "excit" OR "fort" OR "gazett" OR "grop" OR "hon" OR "ignit" OR "ignor" OR "incit" OR "interven" OR "inton" OR "invit" OR "landscap" OR "manoeuvr" OR "nauseat" OR "normalis" OR "outmanoeuvr" OR "overaw" OR "permeat" OR "persever" OR "por" OR "postpon" OR "prun" OR "recit" OR "reshap" OR "rop" OR "shap" OR "shor" OR "snor" OR "ston" OR "wip"){ESEDING} ==> semi_reg_stem(0,"e") +("ape" OR "appall" OR "augur" OR "belong" OR "berth" OR "burr" OR "conquer" OR "egg" OR "enroll" OR "enthrall" OR "forestall" OR "froth" OR "fulfill" OR "install" OR "instill" OR "lacquer" OR "martyr" OR "mouth" OR "murmur" OR "pivot" OR "preceed" OR "prolong" OR "purr" OR "quell" OR "recall" OR "refill" OR "remill" OR "resell" OR "retell" OR "smooth" OR "throng" OR "twang" OR "unearth"){EDING} ==> semi_reg_stem(0,"") +"canvases" ==> irreg_stem("canvas","s") +"carcases" ==> irreg_stem("carcas","s") +"lenses" ==> irreg_stem("lens","s") +"nappies" ==> irreg_stem("nappy","s") +"schizophrenia" ==> irreg_stem("schizophrenia","s") +(({A}*"metre") OR ({A}*"litre") OR ({A}+"ette") OR "acre" OR "Aussie" OR "bronze" OR "budgie" OR "burnurn" OR "canoe" OR "carriageway" OR "catastrophe" OR "centre" OR "cill" OR "cliche" OR "commie" OR "coolie" OR "curie" OR "demesne" OR "employee" OR "evacuee" OR "fibre" OR "foe" OR "headache" OR "horde" OR "magpie" OR "manoeuvre" OR "moggie" OR "moustache" OR "movie" OR "nightie" OR "oboe" OR "programme" OR "queue" OR "sabre" OR "shoe" OR "sloe" OR "sortie" OR "taste" OR "theatre" OR "timbre" OR "titre" OR "umbrella" OR "utopia" OR "wiseacre" OR "woe")"s" ==> stem(1,"","s") +(({A}+"itis") OR "abdomen" OR "achimenes" OR "acumen" OR "Afrikaans" OR "alibi" OR "alkali" OR "amnesia" OR "anaesthesia" OR "aphis" OR "aria" OR "asbestos" OR "asphyxia" OR "atlas" OR "axis" OR "bedclothes" OR "begonia" OR "bias" OR "bikini" OR "calyptopis" OR "cannula" OR "cantharides" OR "canvas" OR "caries" OR "chas" OR "chamois" OR "chaos" OR "chili" OR "chinchilla" OR "Christmas" OR "confetti" OR "contretemps" OR "cornucopia" OR "corps" OR "cosmos" OR "cupola" OR "cyclamen" OR "dais" OR "debris" OR "diabetes" OR "diphtheria" OR "dysphagia" OR "encyclopaedia" OR "ennui" OR "escallonia" OR "ethos" OR "extremis" OR "fella" OR "ferris" OR "flotilla" OR "formula" OR "forsythia" OR "gallows" OR "ganglia" OR "gardenia" OR "gas" OR "gasworks" OR "gondola" OR "grata" OR "guerrilla" OR "haemophilia" OR "hors" OR "hovis" OR "hustings" OR "hysteria" OR "inertia" OR "innards" OR "iris" OR "isosceles" OR "khaki" OR "koala" OR "lens" OR "macaroni" OR "manilla" OR "mania" OR "mantis" OR "maquis" OR "martini" OR "matins" OR "memorabilia" OR "metropolis" OR "minutiae" OR "molasses" OR "morphia" OR "mortis" OR "neurasthenia" OR "normoglycaemia" OR "nostalgia" OR "omen" OR "pantometria" OR "parabola" OR "paraphernalia" OR "pastis" OR "patella" OR "patens"! OR "pathos" OR "patois" OR "pectoris" OR "pelvis" OR "peninsula" OR "phantasmagoria" OR "pharos" OR "plumbites" OR "pneumonia" OR "polyuria" OR "portcullis" OR "pyrexia" OR "regalia" OR "rhinoceros" OR "safari" OR "salami" OR "sari" OR "saturnalia" OR "series" OR "spaghetti" OR "specimen" OR "species" OR "submatrices" OR "subtopia" OR "suburbia" OR "syphilis" OR "tares" OR "taxi" OR "tennis" OR "toccata" OR "trellis" OR "tripos" OR "turps" OR "tutti" OR "umbrella" OR "utopia" OR "villa") ==> null_stem() +("accoutrements" OR "aerodynamics" OR "aeronautics" OR "aesthetics" OR "algae" OR "amends" OR "ammonia" OR "ancients" OR "annals" OR "antics" OR "arrears" OR "assizes" OR "auspices" OR "backwoods" OR "bacteria" OR "banns" OR "barracks" OR "baths" OR "battlements" OR "bellows" OR "belongings" OR "billiards" OR "binoculars" OR "bitters" OR "blandishments" OR "bleachers" OR "blinkers" OR "blues" OR "breeches" OR "brussels" OR "clothes" OR "clutches" OR "commons" OR "confines" OR "contents" OR "credentials" OR "crossbones" OR "crossroads" OR "curia" OR "damages" OR "dealings" OR "dentures" OR "depths" OR "devotions" OR "diggings" OR "doings" OR "downs" OR "droppings" OR "dues" OR "dynamics" OR "earnings" OR "eatables" OR "eaves" OR "economics" OR "electrodynamics" OR "electronics" OR "entrails" OR "environs" OR "equities" OR "ethics" OR "eugenics" OR "filings" OR "finances" OR "folks" OR "footlights" OR "fumes" OR "furnishings" OR "genitals" OR "goggles" OR "goods" OR "grits" OR "groceries" OR "grounds" OR "handcuffs" OR "headquarters" OR "histrionics" OR "hostilities" OR "humanities" OR "hydraulics" OR "hysterics" OR "illuminations" OR "innings" OR "italics" OR "jeans" OR "jitters" OR "kinetics" OR "knickers" OR "kudos" OR "la! titudes" OR "leggings" OR "likes" OR "linguistics" OR "lodgings" OR "loggerheads" OR "mains" OR "manners" OR "mathematics" OR "means" OR "measles" OR "media" OR "memoirs" OR "metaphysics" OR "mews" OR "mockers" OR "morals" OR "motions" OR "munitions" OR "news" OR "nutria" OR "nylons" OR "oats" OR "odds" OR "oils" OR "oilskins" OR "optics" OR "orthodontics" OR "outskirts" OR "overalls" OR "overtones" OR "pants" OR "pantaloons" OR "papers" OR "paras" OR "paratroops" OR "particulars" OR "pediatrics" OR "phonemics" OR "phonetics" OR "physics" OR "pincers" OR "plastics" OR "politics" OR "proceeds" OR "proceedings" OR "prospects" OR "provinces" OR "provisions" OR "pyjamas" OR "races" OR "rations" OR "ravages" OR "refreshments" OR "regards" OR "reinforcements" OR "remains" OR "respects" OR "returns" OR "riches" OR "rights" OR "savings" OR "schizophrenia" OR "scissors" OR "seconds" OR "semantics" OR "senses" OR "shades" OR "shallows" OR "shambles" OR "shares" OR "shivers" OR "shorts" OR "singles" OR "skittles" OR "slacks" OR "soundings" OR "specifics" OR "spectacles" OR "spoils" OR "stamens" OR "statics" OR "statistics" OR "stratums" OR "summons" OR "supplies" OR "surroundings" OR "suspenders" OR "takings" OR "teens" OR "telecommunications" OR "tent! erhooks" OR "thanks" OR "theatricals" OR "thermos" OR "thermodynamics" OR "tights" OR "toils" OR "tops" OR "trades" OR "trappings" OR "travels" OR "troops" OR "tropics" OR "trousers" OR "tweeds" OR "underpants" OR "vapours" OR "vicissitudes" OR "vitals" OR "volumes" OR "wages" OR "wanderings" OR "wares" OR "waters" OR "whereabouts" OR "whites" OR "winnings" OR "withers" OR "woollens" OR "workings" OR "writings" OR "yes") ==> null_stem() +("boatie" OR "bonhomie" OR "clippie" OR "creepie" OR "dearie" OR "droppie" OR "gendarmerie" OR "girlie" OR "goalie" OR "haddie" OR "kookie" OR "kyrie" OR "lambie" OR "lassie" OR "marie" OR "menagerie" OR "pettie" OR "reverie" OR "snottie" OR "sweetie")"s" ==> stem(1,"","s") + +//Additions from PENN treebank + +("buffett" OR "plummett"){EDING} ==> semi_reg_stem(1,"") +"buffetts" ==> irreg_stem("buffet","s") +"plummetts" ==> irreg_stem("plummet","s") +"gunsling" ==> null_stem() +"gunslung" ==> irreg_stem("gunsling","ed") +"gunslinging" ==> irreg_stem("gunsling","ing") +"hamstring" ==> null_stem() +"shred" ==> null_stem() +"unfocuss"{ESEDING} ==> semi_reg_stem(1,"") +("accret" OR "clon" OR "deplet" OR "dethron" OR "dup" OR "excret" OR "expedit" OR "extradit" OR "fet" OR "finetun" OR "gor" OR "hing" OR "massacr" OR "obsolet" OR "reconven" OR "recreat" OR "recus" OR "reignit" OR "swip" OR "videotap" OR "zon"){ESEDING} ==> semi_reg_stem(0,"e") +("backpedal" OR "bankroll" OR "bequeath" OR "blackball" OR "bottom" OR "clang" OR "debut" OR "doctor" OR "eyeball" OR "factor" OR "imperil" OR "landfill" OR "margin" OR "occur" OR "overbill" OR "pilot" OR "prong" OR "pyramid" OR "reinstall" OR "relabel" OR "remodel" OR "squirrel" OR "stonewall" OR "wrong"){EDING} ==> semi_reg_stem(0,"") + + +"biases" ==> irreg_stem("bias","s") +"biscotti" ==> irreg_stem("biscotto","s") +"bookshelves" ==> irreg_stem("bookshelf","s") +"palazzi" ==> irreg_stem("palazzo","s") +("beastie" OR "brownie" OR "cache" OR "cadre" OR "calorie" OR "champagne" OR "cologne" OR "cookie" OR "druggie" OR "eaterie" OR "emigre" OR "emigree" OR "employee" OR "freebie" OR "genre" OR "kiddie" OR "massacre" OR "moonie" OR "necktie" OR "niche" OR "prairie" OR "softie" OR "toothpaste" OR "willie")"s" ==> stem(1,"","s") +(({A}*"phobia") OR "academia" OR "accompli" OR "aegis" OR "anemia" OR "anorexia" OR "anti" OR "artemisia" OR "ataxia" OR "beatlemania" OR "blini" OR "cafeteria" OR "capita" OR "cognoscenti" OR "coli" OR "deli" OR "dementia" OR "downstairs" OR "dyslexia" OR "dystopia" OR "encyclopedia" OR "estancia" OR "euphoria" OR "euthanasia" OR "fracas" OR "fuss" OR "gala" OR "gorilla" OR "gravitas" OR "GI" OR "habeas" OR "haemophilia" OR "hemophilia" OR "hoopla" OR "hubris" OR "hula" OR "hypoglycemia" OR "ides" OR "impatiens" OR "informatics" OR "intelligentsia" OR "jacuzzi" OR "kiwi" OR "leukaemia" OR "leukemia" OR "mafia" OR "magnolia" OR "malaria" OR "maquila" OR "marginalia" OR "megalomania" OR "mercedes" OR "militia" OR "miniseries" OR "mips" OR "mufti" OR "muni" OR "olympics" OR "pancreas" OR "paranoia" OR "pastoris" OR "pastrami" OR "pepperoni" OR "pepsi" OR "piroghi" OR "pizzeria" OR "plainclothes" OR "pneumocystis" OR "potpourri" OR "proboscis" OR "rabies" OR "reggae" OR "regimen" OR "rigatoni" OR "salmonella" OR "samurai" OR "sarsaparilla" OR "semen" OR "ski" OR "sonata" OR "spatula" OR "stats" OR "subtilis" OR "sushi" OR "tachyarrhythmia" OR "tachycardia" OR "tequila" OR "tetris" OR "thrips" OR "throes" OR "timpani" OR "tsunami" OR "vacc! inia" OR "vanilla") ==> null_stem() +("acrobatics" OR "alias" OR "athletics" OR "basics" OR "betters" OR "bifocals" OR "bowels" OR "briefs" OR "checkers" OR "denims" OR "doldrums" OR "dramatics" OR "dungarees" OR "ergonomics" OR "genetics" OR "gymnastics" OR "hackles" OR "haves" OR "incidentals" OR "ironworks" OR "jinks" OR "leavings" OR "leftovers" OR "logistics" OR "makings" OR "microelectronics" OR "mores" OR "oodles" OR "pajamas" OR "pampas" OR "panties" OR "payola" OR "pickings" OR "pliers" OR "pi" OR "ravings" OR "reparations" OR "rudiments" OR "scads" OR "splits" OR "stays" OR "subtitles" OR "sunglasss" OR "sweepstakes" OR "tatters" OR "toiletries" OR "tongs" OR "trivia" OR "tweezers" OR "vibes" OR "waterworks" OR "woolens") ==> null_stem() +("biggie" OR "bourgeoisie" OR "brie" OR "camaraderie" OR "chinoiserie" OR "coterie" OR "doggie" OR "genie" OR "hippie" OR "junkie" OR "lingerie" OR "moxie" OR "preppie" OR "rookie" OR "yuppie")"s" ==> stem(1,"","s") + + +// Additions from SUSANNE corpus +("chor" OR "sepulchr" OR "silhouett" OR "telescop"){ESEDING} ==> semi_reg_stem(0,"e") +("subpena" OR "suds" OR "fresco"){EDING} ==> semi_reg_stem(0,"") + +"daises" ==> irreg_stem("dais","s") +"reguli" ==> irreg_stem("regulo","s") +"steppes" ==> irreg_stem("steppe","s") +(({A}+"philia") OR "fantasia" OR "Feis" OR "Gras" OR "Mardi" OR "OS" OR "pleura" OR "tularemia" OR "vasa") ==> null_stem() +("calisthenics" OR "heroics" OR "rheumatics" OR "victuals" OR "wiles") ==> null_stem() +("auntie" OR "anomie" OR "coosie" OR "quickie")"s" ==> stem(1,"","s") + + +// Additions from SEC (Spoken English Corpus) +("absentia" OR "bourgeois" OR "pecunia" OR "Syntaxis" OR "uncia") ==> null_stem() +("apologetics" OR "goings" OR "outdoors") ==> null_stem() +("collie")"s" ==> stem(1,"","s") + + +// Additions from MRC psycholinguistic database */ + +"bob-sled" ==> null_stem() +"imbed" ==> null_stem() +"precis" ==> null_stem() +"precis"{ESEDING} ==> semi_reg_stem(0,"") + +"obsequies" ==> irreg_stem("obsequy","s") +"superficies" ==> irreg_stem("superficie","s") +("acacia" OR "albumen" OR "alms" OR "alopecia" OR "ambergris" OR "ambrosia" OR "anaemia" OR "analgesia" OR "anopheles" OR "aphasia" OR "arras" OR "assagai" OR "assegai" OR "astrophysics" OR "aubrietia" OR "avoirdupois" OR "bathos" OR "beriberi" OR "biceps" OR "bitumen" OR "borzoi" OR "broccoli" OR "cadi" OR "calends" OR "callisthenics" OR "calla" OR "camellia" OR "campanula" OR "cantata" OR "caravanserai" OR "cedilla" OR "chilli" OR "chrysalis" OR "clematis" OR "clitoris" OR "cognomen" OR "collywobbles" OR "copula" OR "corolla" OR "cybernetics" OR "cyclops" OR "cyclopaedia" OR "cyclopedia" OR "dahlia" OR "dhoti" OR "dickens" OR "dietetics" OR "dipsomania" OR "dolmen" OR "dyspepsia" OR "effendi" OR "elevenses" OR "epidermis" OR "epiglottis" OR "erysipelas" OR "eurhythmics" OR "faeces" OR "fascia" OR "fibula" OR "finis" OR "fistula" OR "fives" OR "fleur-de-lis" OR "forceps" OR "freesia" OR "fuchsia" OR "geophysics" OR "geriatrics" OR "glottis" OR "guerilla" OR "hadji" OR "haggis" OR "hara-kiri" OR "hernia" OR "herpes" OR "hoop-la" OR "houri" OR "hymen" OR "hyperbola" OR "hypochondria" OR "ibis" OR "inamorata" OR "insignia" OR "insomnia" OR "jackanapes" OR "jimjams" OR "jodhpurs" OR "kepi" OR "kleptomania" OR "kohlrabi" OR "kris" OR "k! ukri" OR "kumis" OR "litchi" OR "litotes" OR "loggia" OR "magnesia" OR "man-at-arms" OR "manila" OR "mantilla" OR "marquis" OR "master-at-arms" OR "mattins" OR "melancholia" OR "menses" OR "minutia" OR "monomania" OR "muggins" OR "mumps" OR "mi" OR "myopia" OR "nebula" OR "necropolis" OR "neuralgia" OR "nibs" OR "numismatics" OR "nymphomania" OR "obstetrics" OR "okapi" OR "onomatopoeia" OR "ophthalmia" OR "paraplegia" OR "patchouli" OR "paterfamilias" OR "penis" OR "pergola" OR "petunia" OR "pharmacopoeia" OR "phi" OR "piccalilli" OR "poinsettia" OR "praxis" OR "precis" OR "primula" OR "prophylaxis" OR "pyrites" OR "rabbi" OR "raffia" OR "reredos" OR "revers" OR "rickets" OR "rounders" OR "rubella" OR "saki" OR "salvia" OR "sassafras" OR "sawbones" OR "scabies" OR "scapula" OR "schnapps" OR "scintilla" OR "scrofula" OR "secateurs" OR "sepia" OR "septicaemia" OR "sequoia" OR "shears" OR "smithereens" OR "spermaceti" OR "stamen" OR "suds" OR "sundae" OR "si" OR "swami" OR "tarantella" OR "tarantula" OR "testis" OR "therapeutics" OR "thews" OR "tibia" OR "tiddlywinks" OR "tombola" OR "topi" OR "tortilla" OR "trews" OR "triceps" OR "underclothes" OR "undies" OR "uvula" OR "verdigris" OR "vermicelli" OR "viola" OR "wadi" OR "wapiti" OR "wisteria" OR "yaws! " OR "yogi" OR "zinnia") ==> null_stem() +("aerie" OR "birdie" OR "bogie" OR "caddie" OR "cock-a-leekie" OR "collie" OR "corrie" OR "cowrie" OR "dixie" OR "eyrie" OR "faerie" OR "gaucherie" OR "gillie" OR "knobkerrie" OR "laddie" OR "mashie" OR "mealie" OR "menagerie" OR "organdie" OR "patisserie" OR "pinkie" OR "pixie" OR "stymie" OR "talkie")"s" ==> stem(1,"","s") + + +// Additions from 'Computer-Usable' OALDCE from Oxford Text Archive */ + +("ablutions" OR "adenoids" OR "aerobatics" OR "afters" OR "astronautics" OR "atmospherics" OR "bagpipes" OR "ballistics" OR "bell-bottoms" OR "belles-lettres" OR "blinders" OR "bloomers" OR "butterfingers" OR "buttocks" OR "bygones" OR "cahoots" OR "cannabis" OR "castanets" OR "clappers" OR "corgi" OR "cross-purposes" OR "dodgems" OR "dregs" OR "duckboards" OR "edibles" OR "envoi" OR "eurythmics" OR "externals" OR "extortions" OR "falsies" OR "fisticuffs" OR "fleshings" OR "fleur-de-lys" OR "fours" OR "gentleman-at-arms" OR "geopolitics" OR "giblets" OR "glassworks" OR "gleanings" OR "handlebars" OR "heartstrings" OR "hi-fi" OR "homiletics" OR "housetops" OR "hunkers" OR "hydroponics" OR "impala" OR "kalends" OR "knickerbockers" OR "kwela" OR "lees" OR "lei" OR "lexis" OR "lieder" OR "literati" OR "loins" OR "meanderings" OR "meths" OR "muesli" OR "muniments" OR "necessaries" OR "nines" OR "ninepins" OR "nippers" OR "nuptials" OR "orthopaedics" OR "paediatrics" OR "phonics" OR "polemics" OR "pontificals" OR "prelims" OR "pyrotechnics" OR "ravioli" OR "rompers" OR "ructions" OR "scampi" OR "scrapings" OR "serjeant-at-arms" OR "sheila" OR "shires" OR "smalls" OR "steelworks" OR "sweepings" OR "toxaemia" OR "ti" OR "v! espers" OR "virginals" OR "waxworks" OR "yeti" OR "zucchini") ==> null_stem() +("mountie" OR "brasserie" OR "cup-tie" OR "grannie" OR "koppie" OR "rotisserie" OR "walkie-talkie")"s" ==> stem(1,"","s") + + +// missing -uss irregulars */ + +<*>"busses" ==> irreg_stem("bus","s") +"bussed" ==> irreg_stem("bus","ed") +"bussing" ==> irreg_stem("bus","ing") +<*>"hocus-pocusses" ==> irreg_stem("hocus-pocus","s") +<*>"hocusses" ==> irreg_stem("hocus","s") + +// -use words: list all here so that -us (of which there are many more) +// can be treated as regular +// + +// last pattern does ...house etc, , ...ause, ...fuse */ +(({A}*"-us") OR "abus" OR "accus" OR "amus" OR "arous" OR "bemus" OR "carous" OR "contus" OR "disabus" OR "disus" OR "dous" OR "enthus" OR "excus" OR "grous" OR "misus" OR "mus" OR "overus" OR "perus" OR "reus" OR "rous" OR "sous" OR "us" OR ({A}*[hlmp]"ous") OR ({A}*[af]"us")){ESEDING} ==> semi_reg_stem(0,"e") + +(({A}*"-abus") OR ({A}*"-us") OR "abus" OR "burnous" OR "cayus" OR "chanteus" OR "chartreus" OR "chauffeus" OR "crus" OR "disus" OR "excus" OR "grous" OR "hypotenus" OR "masseus" OR "misus" OR "mus" OR "Ous" OR "overus" OR "poseus" OR "reclus" OR "reus" OR "rus" OR "us" OR ({A}*[hlmp]"ous") OR ({A}*[af]"us"))"es" ==> stem(1,"","s") + +// inflection for 'regular' -us */ + +<*>{A}+"uses" ==> stem(2,"","s") +{A}+"used" ==> stem(2,"","ed") +{A}+"using" ==> stem(3,"","ing") + + +// defaults - changed some * to + */ +{A}*"men" ==> stem(2,"an","s") +{A}*"wives" ==> stem(3,"fe","s") +{A}+"zoa" ==> stem(1,"on","s") +{A}+"iia" ==> stem(2,"um","s") +{A}+"ia" ==> stem(1,"um","s") +{A}+"la" ==> stem(1,"um","s") +{A}+"i" ==> stem(1,"us","s") +{A}+"ae" ==> stem(1,"","s") +{A}+"ata" ==> stem(2,"","s") + + // words that should not be analysed */ +<*>("his" OR "hers" OR "theirs" OR "ours" OR "yours" OR "as" OR "its" OR "this" OR "during" OR "something" OR "nothing" OR "anything" OR "everything") ==> null_stem() +<*>{A}*("us" OR "ss" OR "sis" OR "eed") ==> null_stem() + + +// regular expression rules */ + +<*>{A}+{CX2S}"es" ==> stem(2,"","s") +<*>{A}+"thes" ==> stem(1,"","s") +<*>{A}+{CX}[cglsv]"es" ==> stem(1,"","s") +<*>{A}+"ettes" ==> stem(1,"","s") +<*>{A}+({CX}{2})"es" ==> stem(2,"","s") +<*>{A}+({VI}{2})"es" ==> stem(2,"","s") +<*>{A}+"xes" ==> stem(2,"","s") +<*>{A}+{S}"es" ==> stem(1,"","s") +<*>{A}+{C}"ies" ==> stem(3,"y","s") +<*>{A}*{CX}"oes" ==> stem(2,"","s") +<*>{A}+"s" ==> stem(1,"","s") + + + +{CX}+"ed" ==> null_stem() +{PRE}*{C}{V}"nged" ==> stem(2,"","ed") +{A}+"icked" ==> stem(2,"","ed") +{A}+{CX2S}"ed" ==> stem(2,"","ed") +{PRE}{C}+{V}"lled" ==> stem(3,"","ed") +{A}*{C}+{V}"lled" ==> stem(2,"","ed") +{A}*{C}"ined" ==> stem(1,"","ed") +{A}*{C}{V}{CX2}"ed" ==> stem(3,"","ed") +{A}*{C}{V}[npwx]"ed" ==> stem(2,"","ed") +{A}+{C}"ied" ==> stem(3,"y","ed") +{A}*"qu"{V}{C}"ed" ==> stem(1,"","ed") +{A}+"u"{V}"ded" ==> stem(1,"","ed") +{A}*{C}"leted" ==> stem(1,"","ed") +{PRE}*{C}+[ei]"ted" ==> stem(1,"","ed") +{A}+[ei]"ted" ==> stem(2,"","ed") +{PRE}({CX}{2})"eated" ==> stem(2,"","ed") +{A}*{V}({CX}{2})"eated" ==> stem(1,"","ed") +{A}+[eo]"ated" ==> stem(2,"","ed") +{A}+{V}"ated" ==> stem(1,"","ed") +{A}*({V}{2})[cgsvz]"ed" ==> stem(1,"","ed") +{A}*({V}{2}){C}"ed" ==> stem(2,"","ed") +{A}+[rw]"led" ==> stem(2,"","ed") +{A}+"thed" ==> stem(1,"","ed") +{A}+{CX}[cglsv]"ed" ==> stem(1,"","ed") +{A}+({CX}{2})"ed" ==> stem(2,"","ed") +{A}+({VI}{2})"ed" ==> stem(2,"","ed") +{PRE}*{C}+"ored" ==> stem(1,"","ed") +{A}+"ctored" ==> stem(2,"","ed") +{A}*{C}[clnt]"ored" ==> stem(1,"","ed") +{A}+[eo]"red" ==> stem(2,"","ed") +{A}+"ed" ==> stem(1,"","ed") + +{CX}+"ing" ==> null_stem() +{PRE}*{C}{V}"nging" ==> stem(3,"","ing") +{A}+"icking" ==> stem(3,"","ing") +{A}+{CX2S}"ing" ==> stem(3,"","ing") +{PRE}{C}+{V}"lling" ==> stem(4,"","ing") +{A}*{C}+{V}"lling" ==> stem(3,"","ing") +{A}*{C}"ining" ==> stem(3,"e","ing") +{A}*{C}{V}[npwx]"ing" ==> stem(3,"","ing") +{A}*{C}{V}{CX2}"ing" ==> stem(4,"","ing") +{A}*"qu"{V}{C}"ing" ==> stem(3,"e","ing") +{A}+"u"{V}"ding" ==> stem(3,"e","ing") +{A}*{C}"leting" ==> stem(3,"e","ing") +{PRE}*{C}+[ei]"ting" ==> stem(3,"e","ing") +{A}+[ei]"ting" ==> stem(3,"","ing") +{A}*{PRE}({CX}{2})"eating" ==> stem(3,"","ing") +{A}*{V}({CX}{2})"eating" ==> stem(3,"e","ing") +{A}+[eo]"ating" ==> stem(3,"","ing") +{A}+{V}"ating" ==> stem(3,"e","ing") +{A}*({V}{2})[cgsvz]"ing" ==> stem(3,"e","ing") +{A}*({V}{2}){C}"ing" ==> stem(3,"","ing") +{A}+[rw]"ling" ==> stem(3,"","ing") +{A}+"thing" ==> stem(3,"e","ing") +{A}+{CX}[cglsv]"ing" ==> stem(3,"e","ing") +{A}+({CX}{2})"ing" ==> stem(3,"","ing") +{A}+({VI}{2})"ing" ==> stem(3,"","ing") +{PRE}*{C}"ying" ==> stem(4,"ie","ing") +{A}+"ying" ==> stem(3,"","ing") +{A}*{CX}"oing" ==> stem(3,"","ing") +{PRE}*{C}+"oring" ==> stem(3,"e","ing") +{A}+"ctoring" ==> stem(3,"","ing") +{A}*{C}[clt]"oring" ==> stem(3,"e","ing") +{A}+[eo]"ring" ==> stem(3,"","ing") +{A}+"ing" ==> stem(3,"e","ing") + +"'d" ==> irreg_stem("have","ed") +"'ll" ==> irreg_stem("will","") +"'m" ==> irreg_stem("be","") +"'re" ==> irreg_stem("be","") +"'s" ==> irreg_stem("be","s") +"'ve" ==> irreg_stem("have","") + + +<*>{A}+"'s" ==> stem(2,"","'") diff --git a/core/src/test/resources/model/left3words-wsj-0-18.tagger b/core/src/test/resources/model/left3words-wsj-0-18.tagger new file mode 100644 index 00000000..1bb99040 Binary files /dev/null and b/core/src/test/resources/model/left3words-wsj-0-18.tagger differ diff --git a/core/src/test/resources/model/ner-eng-ie.crf-3-all2008-distsim.ser.gz b/core/src/test/resources/model/ner-eng-ie.crf-3-all2008-distsim.ser.gz new file mode 100644 index 00000000..2a67d7ba Binary files /dev/null and b/core/src/test/resources/model/ner-eng-ie.crf-3-all2008-distsim.ser.gz differ diff --git a/distsim/pom.xml b/distsim/pom.xml index fef74f3e..18809942 100644 --- a/distsim/pom.xml +++ b/distsim/pom.xml @@ -3,7 +3,7 @@ eu.excitementproject eop - 1.1.3 + 1.1.4 distsim distsim @@ -43,19 +43,19 @@ eu.excitementproject common - 1.1.3 + 1.1.4 eu.excitementproject redis - 1.1.3 + 1.1.4 eu.excitementproject lap - 1.1.3 + 1.1.4 + + + true +
diff --git a/distsim/src/main/resources/demo/configurations/dirt/knowledge-resource.xml b/distsim/src/main/resources/demo/configurations/dirt/knowledge-resource.xml index cee60469..f666d2bd 100644 --- a/distsim/src/main/resources/demo/configurations/dirt/knowledge-resource.xml +++ b/distsim/src/main/resources/demo/configurations/dirt/knowledge-resource.xml @@ -34,6 +34,11 @@ te-srv2 7005 --> + + + true + +
diff --git a/distsim/src/main/resources/demo/configurations/lin/dependency/knowledge-resource.xml b/distsim/src/main/resources/demo/configurations/lin/dependency/knowledge-resource.xml index fe93356f..e0aebc1f 100644 --- a/distsim/src/main/resources/demo/configurations/lin/dependency/knowledge-resource.xml +++ b/distsim/src/main/resources/demo/configurations/lin/dependency/knowledge-resource.xml @@ -38,6 +38,9 @@ te-srv2 7003 --> + + + true
diff --git a/distsim/src/main/resources/demo/configurations/lin/proximity/knowledge-resource.xml b/distsim/src/main/resources/demo/configurations/lin/proximity/knowledge-resource.xml index c29050f2..8079119b 100644 --- a/distsim/src/main/resources/demo/configurations/lin/proximity/knowledge-resource.xml +++ b/distsim/src/main/resources/demo/configurations/lin/proximity/knowledge-resource.xml @@ -38,6 +38,9 @@ te-srv2 7001 --> + + + true diff --git a/globalgraphoptimizer/pom.xml b/globalgraphoptimizer/pom.xml index 0a3fb81d..1c3d4857 100644 --- a/globalgraphoptimizer/pom.xml +++ b/globalgraphoptimizer/pom.xml @@ -4,7 +4,7 @@ eu.excitementproject eop - 1.1.3 + 1.1.4 globalgraphoptimizer @@ -19,7 +19,7 @@ eu.excitementproject common - 1.1.3 + 1.1.4 diff --git a/lap/pom.xml b/lap/pom.xml index d412326b..d8df14a9 100644 --- a/lap/pom.xml +++ b/lap/pom.xml @@ -5,7 +5,7 @@ eu.excitementproject eop - 1.1.3 + 1.1.4 lap @@ -89,7 +89,7 @@ eu.excitementproject common - 1.1.3 + 1.1.4 de.tudarmstadt.ukp.dkpro.core @@ -160,7 +160,8 @@ - + --> @@ -211,6 +212,14 @@ 20130511.0 + + de.tudarmstadt.ukp.dkpro.core + + de.tudarmstadt.ukp.dkpro.core.maltparser-model-parser-it-linear + + 20141002.0 + + :"); for (LexicalRule rule : l1) - System.out.println("\t" + rule.getRLemma() + ", " + rule.getConfidence()); + System.out.println("\t" + rule.getRLemma() + ", " + rule.getRelation() + ", " + rule.getConfidence()); System.out.println("\n--> " + term + ": "); for (LexicalRule rule : l2) - System.out.println("\t" + rule.getLLemma() + ", " + rule.getConfidence()); + System.out.println("\t" + rule.getLLemma() + ", " + rule.getRelation() + ", " + rule.getConfidence()); } public RedisBasedWikipediaLexicalResource(ConfigurationParams params) throws ConfigurationException, LexicalResourceException, RedisRunException { @@ -113,6 +114,20 @@ public RedisBasedWikipediaLexicalResource(ConfigurationParams params) throws Con extractionTypes = null; } + + try { + String stopWordsFile = params.getString(PARAM_STOP_WORDS); + try { + stopWords = new LinkedHashSet(FileUtils.loadFileToList(new File(stopWordsFile))); + } + catch (IOException e) { + throw new LexicalResourceException("error reading " + stopWordsFile); + } + } catch (ConfigurationException e) { + stopWords = null; + + } + initRedisDB(params); String classifierPathName = params.getString("classifierPath"); @@ -163,10 +178,16 @@ private void initRedisDB(ConfigurationParams params) throws RedisRunException { } catch (ConfigurationException e) { } + boolean bVM = false; + try { + bVM = params.getBoolean(Configuration.REDIS_VM); + } catch (ConfigurationException e) { + } + if (hostLeft == null || portLeft == -1 || hostRight == null || portRight == -1) { try { - leftRules = (redisDir == null ? new RedisBasedStringListBasicMap(params.get(Configuration.L2R_REDIS_DB_FILE)) : new RedisBasedStringListBasicMap(params.get(Configuration.L2R_REDIS_DB_FILE),redisDir)); - rightRules = (redisDir == null ? new RedisBasedStringListBasicMap(params.get(Configuration.R2L_REDIS_DB_FILE)) : new RedisBasedStringListBasicMap(params.get(Configuration.R2L_REDIS_DB_FILE), redisDir)); + leftRules = (redisDir == null ? new RedisBasedStringListBasicMap(params.get(Configuration.L2R_REDIS_DB_FILE),bVM) : new RedisBasedStringListBasicMap(params.get(Configuration.L2R_REDIS_DB_FILE),redisDir,bVM)); + rightRules = (redisDir == null ? new RedisBasedStringListBasicMap(params.get(Configuration.R2L_REDIS_DB_FILE),bVM) : new RedisBasedStringListBasicMap(params.get(Configuration.R2L_REDIS_DB_FILE), redisDir,bVM)); } catch (Exception e) { throw new RedisRunException(e); } @@ -180,13 +201,13 @@ private void initRedisDB(ConfigurationParams params) throws RedisRunException { } - public RedisBasedWikipediaLexicalResource(Classifier classifier, String leftRedisDBFile, String rightRedisDBFile) throws UnsupportedPosTagStringException, FileNotFoundException, RedisRunException{ - this(classifier, DEFAULT_RULES_LIMIT, leftRedisDBFile, rightRedisDBFile); + public RedisBasedWikipediaLexicalResource(Classifier classifier, String leftRedisDBFile, String rightRedisDBFile, boolean bVM) throws UnsupportedPosTagStringException, FileNotFoundException, RedisRunException{ + this(classifier, DEFAULT_RULES_LIMIT, leftRedisDBFile, rightRedisDBFile, bVM); } - public RedisBasedWikipediaLexicalResource(Classifier classifier, int limitOnRetrievedRules, String leftRedisDBFile, String rightRedisDBFile) throws UnsupportedPosTagStringException, FileNotFoundException, RedisRunException { - leftRules = new RedisBasedStringListBasicMap(leftRedisDBFile); - rightRules = new RedisBasedStringListBasicMap(rightRedisDBFile); + public RedisBasedWikipediaLexicalResource(Classifier classifier, int limitOnRetrievedRules, String leftRedisDBFile, String rightRedisDBFile, boolean bVM) throws UnsupportedPosTagStringException, FileNotFoundException, RedisRunException { + leftRules = new RedisBasedStringListBasicMap(leftRedisDBFile, bVM); + rightRules = new RedisBasedStringListBasicMap(rightRedisDBFile, bVM); this.m_nounPOS = new eu.excitementproject.eop.common.representation.partofspeech.ByCanonicalPartOfSpeech(CanonicalPosTag.N.name()); this.m_classifier = classifier; this.m_limitOnRetrievedRules = limitOnRetrievedRules; @@ -220,9 +241,9 @@ public List> getRules(String lemma, PartOfSp if (lemma != null) lemma = lemma.toLowerCase(); - //If it's not a noun, we ignore it... - if ((pos !=null) && (!(pos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) - { + //If it's not a noun or it's a stop word, we ignore it... + if (((pos !=null) && (!(pos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) + || (stopWords != null && stopWords.contains(lemma))) { return new LinkedList>(); } @@ -253,14 +274,16 @@ public List> getRules(String leftLemma, if (rightLemma != null) rightLemma = rightLemma.toLowerCase(); - //If it's not a noun, we ignore it...1 - if ((leftPos !=null) && (!(leftPos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) + //If it's not a noun, or it's a stop word, we ignore it...1 + if (((leftPos !=null) && (!(leftPos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) + || (stopWords != null && stopWords.contains(leftLemma))) { return new LinkedList>(); } - //If it's not a noun, we ignore it... - if ((rightPos !=null) && (!(rightPos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) + //If it's not a noun, or it's a stop word, we ignore it... + if (((rightPos !=null) && (!(rightPos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) + || (stopWords != null && stopWords.contains(rightLemma))) { return new LinkedList>(); } @@ -291,7 +314,7 @@ private List> makeLexicalRules(List> rules = new LinkedList>(); - for (RedisRuleData ruleData : rulesData) { + for (RedisRuleData ruleData : rulesData) { LexicalRule rule = new LexicalRule(ruleData.getLeftTerm(), this.m_nounPOS, ruleData.getRightTerm(), this.m_nounPOS, Math.max(Math.min(m_classifier.getRank(ruleData),MAXIMAL_CONFIDENCE),MINIMAL_CONFIDENCE), ruleData.getRuleType(), diff --git a/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/RedisBasedWikipediaLexicalResourceConfig.xml b/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/RedisBasedWikipediaLexicalResourceConfig.xml new file mode 100644 index 00000000..87b83e2c --- /dev/null +++ b/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/RedisBasedWikipediaLexicalResourceConfig.xml @@ -0,0 +1,55 @@ + + +
+wiki + + + + + + + + wiki-l2r.rdb + + wiki-r2l.rdb + + + + + + + + + 0.001 + + + 10 + + + Redirect,Parenthesis,LexicalIDM,SyntacticIDM + + + stopwords-Eyal.txt + + + + eu.excitementproject.eop.lexicalminer.definition.classifier.syntacticpatterns.offlineClassifiers.syntacticpatternsLocationsSquare + SyntacticOfflinePosRelationLocationSquareClassifier + + + false + +
+ +
\ No newline at end of file diff --git a/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/RedisRuleData.java b/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/RedisRuleData.java index 9dbe8079..5a1b75df 100644 --- a/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/RedisRuleData.java +++ b/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/RedisRuleData.java @@ -80,11 +80,11 @@ public double getDefultRank() { public Double getClassifierRank(int classifierId) { if (classifierRanks == null) - return 0.0; //return null; + return null; double d = classifierRanks.get(classifierId-1); if (d == -1) - return 0.0; //return null; + return null; else return d; } diff --git a/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/SQL2RedisConverter.java b/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/SQL2RedisConverter.java index 878bc5e8..5fc11f08 100644 --- a/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/SQL2RedisConverter.java +++ b/lexicalinferenceminer/src/main/java/eu/excitementproject/eop/lexicalminer/redis/SQL2RedisConverter.java @@ -41,8 +41,8 @@ public static void main(String[] args) throws Exception { //Assumption: the classifiers are identified by 1...numOfClassifiers int numOfClassifiers = Integer.parseInt(args[3]); - int lPort = BasicRedisRunner.getInstance().run(args[1]); - int rPort = BasicRedisRunner.getInstance().run(args[2]); + int lPort = BasicRedisRunner.getInstance().run(args[1],false); + int rPort = BasicRedisRunner.getInstance().run(args[2],false); JedisPool lpool = new JedisPool(new JedisPoolConfig(), "localhost",lPort,10000); Jedis lJedis = lpool.getResource(); diff --git a/lexicalinferenceminer/src/main/resources/stopwords-Eyal.txt b/lexicalinferenceminer/src/main/resources/stopwords-Eyal.txt new file mode 100644 index 00000000..8ba73823 --- /dev/null +++ b/lexicalinferenceminer/src/main/resources/stopwords-Eyal.txt @@ -0,0 +1,160 @@ +'ve +'re +'s +s +a +an +about +after +again +all +almost +also +although +always +among +an +and +another +any +approximately +are +as +at +be +because +been +before +being +between +both +but +by +can +could +did +do +does +done +due +during +each +either +enough +especially +etc +followed +following +for +from +further +had +hardly +has +have +having +her +here +hers +his +how +however +if +in +into +is +it +it's +its +itself +just +kg +km +largely +like +likes +mainly +may +might +ml +mm +more +most +mostly +must +my +nearly +neither +no +nor +not +now +of +often +on +one +only +or +other +our +ours +out +over +overall +per +perhaps +possible +previously +quite +rather +really +regarding +same +several +should +since +so +some +something +such +than +that +the +their +theirs +them +then +there +these +they +thing +this +those +through +thus +to +under +up +upon +various +very +was +we +were +what +when +whereas +which +while +with +within +without +would +he +she +who +whose +will +you +your +yours \ No newline at end of file diff --git a/lexicalinferenceminer/src/test/java/eu/excitementproject/eop/lexicalminer/AppTest.java b/lexicalinferenceminer/src/test/java/eu/excitementproject/eop/lexicalminer/AppTest.java deleted file mode 100644 index 897873c8..00000000 --- a/lexicalinferenceminer/src/test/java/eu/excitementproject/eop/lexicalminer/AppTest.java +++ /dev/null @@ -1,38 +0,0 @@ -package eu.excitementproject.eop.lexicalminer; - -import junit.framework.Test; -import junit.framework.TestCase; -import junit.framework.TestSuite; - -/** - * Unit test for simple App. - */ -public class AppTest - extends TestCase -{ - /** - * Create the test case - * - * @param testName name of the test case - */ - public AppTest( String testName ) - { - super( testName ); - } - - /** - * @return the suite of tests being tested - */ - public static Test suite() - { - return new TestSuite( AppTest.class ); - } - - /** - * Rigourous Test :-) - */ - public void testApp() - { - assertTrue( true ); - } -} diff --git a/pom.xml b/pom.xml index 2bfd4b5f..7a02263e 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 eu.excitementproject eop - 1.1.3 + 1.1.4 pom Free Open Source Platform for Recognizing Textual Entailments (RTE) The Excitement Open Platform (EOP), has created a generic architecture and a comprehensive implementation for a multilingual textual inference platform available to the scientific and technological communities. The EOP is a main product of the EXCITEMENT project. This research project, has two interleaved high-level goals. The first is to set up, for the first time, a generic architecture and a comprehensive implementation for a multilingual textual inference platform and to make it available to the scientific and technological communities. The second goal of the project is to develop a new generation of inference-based industrial text exploration applications for customer interactions, which will enable businesses to better analyze and make sense of their diverse and often unpredicted client content. These goals will be achieved for three languages – English, German and Italian, and for three customer interaction channels – speech (transcriptions), email and social media. @@ -11,7 +11,7 @@ scm:git:git@github.com:hltfbk/Excitement-Open-Platform.git scm:git:git@github.com:hltfbk/Excitement-Open-Platform.git - v1.1.3 + v1.1.4 @@ -34,7 +34,98 @@ distsim globalgraphoptimizer lexicalinferenceminer + alignmentedas + + + + + + jenkins_biutee_test + + + + maven-surefire-plugin + + ${dependency.output.dir}/biutee/workdir/ + + **/biutee/*.java + **/biu/**/*.java + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + false + unpack + test + + unpack + + + + + eu.excitementproject + biutee_env + 0.0.0 + true + zip + **/* + + + ${dependency.output.dir} + + + + + + + + + + + biutee_test + + + + maven-surefire-plugin + + ${surefire.working.dir} + + **/biutee/*.java + **/biu/**/*.java + + + + + + + + + + + @@ -256,7 +347,7 @@ 2.13 false - -Xms1G -Xmx1G + -Xms4G -Xmx4G @@ -633,5 +724,7 @@ UTF-8 + ${project.build.directory}/BIUTEE_Environment/biutee/workdir/ + ${project.build.directory}/BIUTEE_Environment/ diff --git a/redis/pom.xml b/redis/pom.xml index f0dd0a42..966c8c9a 100644 --- a/redis/pom.xml +++ b/redis/pom.xml @@ -3,7 +3,7 @@ eu.excitementproject eop - 1.1.3 + 1.1.4 redis redis @@ -25,7 +25,7 @@ eu.excitementproject common - 1.1.3 + 1.1.4 diff --git a/redis/src/main/java/eu/excitementproject/eop/redis/BasicRedisRunner.java b/redis/src/main/java/eu/excitementproject/eop/redis/BasicRedisRunner.java index 06545f9d..5087532e 100644 --- a/redis/src/main/java/eu/excitementproject/eop/redis/BasicRedisRunner.java +++ b/redis/src/main/java/eu/excitementproject/eop/redis/BasicRedisRunner.java @@ -155,6 +155,8 @@ public class BasicRedisRunner implements RedisRunner { protected static final String PID_FILE_EXT = ".pid"; protected static final String VM_SWAP_FILE_EXT = ".swap"; protected static final String DB_FILE_EXT = ".rdb"; + protected static final String VM_ENABLED = "vm-enabled"; + protected static final String REALY_USE_VM = "really-use-vm"; protected static RedisRunner instance = null; protected static String redisBinDir; //a path to the Redis binary directory @@ -274,7 +276,7 @@ public synchronized void setRedisConfigurationFileTemplate(String templateConfig * @see eu.excitementproject.eop.distsim.redis.RedisRunner#run(java.lang.String, java.lang.String) */ @Override - public synchronized int run(final String dbFile) throws RedisRunException { + public synchronized int run(final String dbFile, boolean bVM) throws RedisRunException { RedisInstanceInfo instanceInfo = mapDir2FileInstanceInfo.get(dbFile); if (instanceInfo == null) { @@ -284,7 +286,7 @@ public synchronized int run(final String dbFile) throws RedisRunException { } catch (NoAvailablePortException e) { throw new RedisRunException(e); } - run1(dbFile,port); + run1(dbFile,port,bVM); Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { @@ -308,15 +310,17 @@ public void run() { * Run, if needed, a local Redis server for a given Redis database file in a given directory * * @param dbFile An existing or non-existing Redis database file. The parent directory of @param dbFile should have writing permissions + * @param port a port for the redis server to be run + * @param bVM should the redis server run on low memory usage mode (virtual memory). * @throws RedisRunException */ - public synchronized int run1(final String dbFile, int port) throws RedisRunException { + public synchronized int run1(final String dbFile, int port, boolean bVM) throws RedisRunException { logger.debug("running redis server for file " + dbFile); //Assumption: dbFile is a Redis db file (.rdb) or non existing file try { - String confFile = generateConfigurationFile(dbFile,port,templateConfigurationFile); + String confFile = generateConfigurationFile(dbFile,port,templateConfigurationFile,bVM); logger.info("A new instance is about to run on port " + port + " according to " + confFile + " configuration"); @@ -414,11 +418,12 @@ protected synchronized boolean close1(String dbFile, RedisInstanceInfo instanceI * @param dbDir an existing directory with writing permissions. * @param dbFile a path to an existing or non-existing Redis database file, located at @param dbDir directory * @param port an available port for Redis connection + * @param bVM should the redis server run on low memory usage mode (virtual memory). * @return the path to the new generated configuration file * @throws IOException */ - public synchronized static String generateConfigurationFile(String dbfile, int port) throws IOException { - return generateConfigurationFile(dbfile,port,DEFAULT_REDIS_BIN_DIR + "/" + DEFAULT_TEMPLATE_CONFIGURATION_FILE_NAME); + public synchronized static String generateConfigurationFile(String dbfile, int port, boolean bVM) throws IOException { + return generateConfigurationFile(dbfile,port,DEFAULT_REDIS_BIN_DIR + "/" + DEFAULT_TEMPLATE_CONFIGURATION_FILE_NAME,bVM); } /** @@ -429,10 +434,11 @@ public synchronized static String generateConfigurationFile(String dbfile, int p * @param dbFile a path to an existing or non-existing Redis database file, located at @param dbDir directory * @param port an available port for Redis connection * @param templateConfigurationFile a template of redis configuration file + * @param bVM should the redis server run on low memory usage mode (virtual memory). * @return the path to the new generated configuration file * @throws IOException */ - public synchronized static String generateConfigurationFile(String dbfile, int port, String templateConfigurationFile) throws IOException { + public synchronized static String generateConfigurationFile(String dbfile, int port, String templateConfigurationFile, boolean bVM) throws IOException { String confFile = dbfile + CONF_FILE_EXT; String dbDir = new File(dbfile).getParent(); BufferedReader reader = new BufferedReader(new FileReader(templateConfigurationFile)); @@ -449,6 +455,10 @@ public synchronized static String generateConfigurationFile(String dbfile, int p writer.println(DB_FILE_NAME + " " + new File(dbfile).getName()); } else if (line.startsWith(VM_SWAP_FILE)) { writer.println(VM_SWAP_FILE + " " + dbfile + VM_SWAP_FILE_EXT); + } else if (line.startsWith(VM_ENABLED)) { + writer.println(VM_ENABLED + " " + (bVM ? "yes" : "no")); + if (bVM) + writer.println(REALY_USE_VM + " yes"); } else { writer.println(line); } @@ -528,9 +538,9 @@ public static void main(String[] args) { try { BasicRedisRunner.setRedisBinDir(args[0]); RedisRunner runner = BasicRedisRunner.getInstance(args[1]); - runner.run(args[2]); - runner.run(args[2]); - runner.run(args[2]); + runner.run(args[2],false); + runner.run(args[2],false); + runner.run(args[2],false); Thread.sleep(10000); runner.close(args[2]); diff --git a/redis/src/main/java/eu/excitementproject/eop/redis/Configuration.java b/redis/src/main/java/eu/excitementproject/eop/redis/Configuration.java index e2d37e95..7ed29129 100644 --- a/redis/src/main/java/eu/excitementproject/eop/redis/Configuration.java +++ b/redis/src/main/java/eu/excitementproject/eop/redis/Configuration.java @@ -24,6 +24,7 @@ public class Configuration { public static final String REDIS_CONFIGURATION_TEMPLATE_FILE = "redis-configuration-template-file"; public static final String REDIS_BIN_DIR = "redis-binary-dir"; public static final String REDIS_FILE = "redis-file"; + public static final String REDIS_VM = "redis-vm"; } diff --git a/redis/src/main/java/eu/excitementproject/eop/redis/RedisBasedStringListBasicMap.java b/redis/src/main/java/eu/excitementproject/eop/redis/RedisBasedStringListBasicMap.java index 4aedd0c2..d3c756ac 100644 --- a/redis/src/main/java/eu/excitementproject/eop/redis/RedisBasedStringListBasicMap.java +++ b/redis/src/main/java/eu/excitementproject/eop/redis/RedisBasedStringListBasicMap.java @@ -36,13 +36,13 @@ public class RedisBasedStringListBasicMap { public static final String ELEMENT_CLASS_NAME_KEY = "element-class-name"; - public RedisBasedStringListBasicMap(String dbFile, String redisDir) throws FileNotFoundException, RedisRunException { - init(dbFile, redisDir); + public RedisBasedStringListBasicMap(String dbFile, String redisDir, boolean bVM) throws FileNotFoundException, RedisRunException { + init(dbFile, redisDir,bVM); } - public RedisBasedStringListBasicMap(String dbFile) throws FileNotFoundException, RedisRunException { - init(dbFile); + public RedisBasedStringListBasicMap(String dbFile, boolean bVM) throws FileNotFoundException, RedisRunException { + init(dbFile,bVM); } public RedisBasedStringListBasicMap(String host, int port) { @@ -52,33 +52,39 @@ public RedisBasedStringListBasicMap(String host, int port) { public RedisBasedStringListBasicMap(ConfigurationParams params) throws ConfigurationException, FileNotFoundException, RedisRunException { String dbFile = params.get(Configuration.REDIS_FILE); + boolean bVM = false; + try { + bVM = params.getBoolean(Configuration.REDIS_VM); + } catch (ConfigurationException e) {} String redisDir = null; try { - redisDir = params.get(Configuration.REDIS_BIN_DIR); - init(dbFile,redisDir); + redisDir = params.get(Configuration.REDIS_BIN_DIR); + init(dbFile,redisDir,bVM); } catch (ConfigurationException e) { - init(dbFile); + init(dbFile,bVM); } } - protected void init(String dbFile) throws FileNotFoundException, RedisRunException { + protected void init(String dbFile, boolean bVM) throws FileNotFoundException, RedisRunException { this.dbFile = dbFile; - int port = BasicRedisRunner.getInstance().run(dbFile); + int port = BasicRedisRunner.getInstance().run(dbFile,bVM); init("localhost",port); } - protected void init(String dbFile, String redisDir) throws FileNotFoundException, RedisRunException { + protected void init(String dbFile, String redisDir, boolean bVM) throws FileNotFoundException, RedisRunException { this.dbFile = dbFile; BasicRedisRunner.setRedisBinDir(redisDir); - int port = BasicRedisRunner.getInstance().run(dbFile); + int port = BasicRedisRunner.getInstance().run(dbFile,bVM); init("localhost",port); - } protected void init(String host, int port) { JedisPool pool = new JedisPool(new JedisPoolConfig(), host,port); + //debug + logger.info("Connecting to redis server at host " + host + ", port " + port); + jedis = pool.getResource(); jedis.connect(); jedis.getClient().setTimeoutInfinite(); diff --git a/redis/src/main/java/eu/excitementproject/eop/redis/RedisRunner.java b/redis/src/main/java/eu/excitementproject/eop/redis/RedisRunner.java index 7c434903..133d65bf 100644 --- a/redis/src/main/java/eu/excitementproject/eop/redis/RedisRunner.java +++ b/redis/src/main/java/eu/excitementproject/eop/redis/RedisRunner.java @@ -15,10 +15,11 @@ public interface RedisRunner { * Runs a a local Redis server instance for a given database file. The file may not existed (as the case of running Redis on a new database) * * @param dbFile An existing or non-existing Redis database file. The parent directory of the given file should have writing permissions + * @param bVM should the redis server run on low memory usage mode (virtual memory). * @return the port id of the Redis server instance for the given file in the given directory * @throws RedisRunException */ - int run(final String dbFile) throws RedisRunException; + int run(final String dbFile, boolean bVM) throws RedisRunException; /** * Stops the running of a given Redis server, specified by the given db file (in case no other references for the running Redis server exists) diff --git a/redis/src/main/java/eu/excitementproject/eop/redis/RunRedisResource.java b/redis/src/main/java/eu/excitementproject/eop/redis/RunRedisResource.java index a1aa0b3c..2b69dc6c 100644 --- a/redis/src/main/java/eu/excitementproject/eop/redis/RunRedisResource.java +++ b/redis/src/main/java/eu/excitementproject/eop/redis/RunRedisResource.java @@ -13,13 +13,14 @@ public class RunRedisResource { public static void main(String[] args) throws Exception { - if (args.length != 2) { - System.out.println("Usage: java eu.excitementproject.eop.redis.RunRedisResource "); + if (args.length != 3) { + System.out.println("Usage: java eu.excitementproject.eop.redis.RunRedisResource "); System.exit(0); } String redisFile = args[0]; int port = Integer.parseInt(args[1].trim()); - String redisConfFile = BasicRedisRunner.generateConfigurationFile(redisFile, port); + boolean bVM = Boolean.parseBoolean(args[2]); + String redisConfFile = BasicRedisRunner.generateConfigurationFile(redisFile, port,bVM); Runtime.getRuntime().exec(new String[]{BasicRedisRunner.DEFAULT_REDIS_BIN_DIR + "/" + BasicRedisRunner.getRedisServerCmd(),redisConfFile}); } } diff --git a/transformations/pom.xml b/transformations/pom.xml index 0b9a2f7f..049c933f 100644 --- a/transformations/pom.xml +++ b/transformations/pom.xml @@ -4,7 +4,7 @@ eu.excitementproject eop - 1.1.3 + 1.1.4 transformations transformations @@ -31,23 +31,23 @@ eu.excitementproject common - 1.1.3 + 1.1.4 eu.excitementproject lap - 1.1.3 + 1.1.4 eu.excitementproject core - 1.1.3 + 1.1.4 eu.excitementproject lexicalminer - 1.1.3 + 1.1.4 diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruth.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruth.java new file mode 100644 index 00000000..d8eaba72 --- /dev/null +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruth.java @@ -0,0 +1,59 @@ +package eu.excitementproject.eop.transformations.biu.en.predicatetruth; + +import java.util.List; + +import eu.excitementproject.eop.lap.biu.ner.NamedEntityRecognizerException; +import eu.excitementproject.eop.transformations.generic.truthteller.conll.ConllConverterException; +import eu.excitementproject.eop.transformations.representation.ExtendedNode; + +/** + * An interface for Truth annotations components + * + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public interface PredicateTruth { + + /** + * Call this method only once. + *

+ * Call the method to initialize the {@linkplain PredicateTruth} + *

+ * Don't call other methods of this interface before calling {@linkplain #init()} method. + * @throws ConllConverterException + * @throws NamedEntityRecognizerException An error occured while trying to initialize. + */ + public void init() throws PredicateTruthException; + + /** + * Set a sentence to the {@linkplain PredicateTruth}. + * */ + public void setSentence(ExtendedNode annotatedSentence); + + /** + * Assigns truth value to the words in the sentence.
+ * Assigns null for words which don't have truth values. + * @throws PredicateTruthException on Any error + * @throws ConllConverterException + */ + public void annotate() throws PredicateTruthException; + + /** + *

Call this method only after calling {@link #annotate()} method. + * @return a List of corresponding to truth values according to the position of the token in the sentence + * as assigned by the truth annotator (by the {@link #annotate} method). + */ + public List getAnnotatedEntities(); + + /** + * Call this method once you have finished using the {@link PredicateTruth}, + * and you will not use it any more. + *

+ * I.e. DON'T call it each time you are done with a sentence, but + * only once there are no more sentences to be annotated any more. + */ + public void cleanUp(); +} + + diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruthException.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruthException.java new file mode 100644 index 00000000..3cefc67b --- /dev/null +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruthException.java @@ -0,0 +1,23 @@ +package eu.excitementproject.eop.transformations.biu.en.predicatetruth; + + +/** + * Exception class thrown by {@link PredicateTruth} in any error case. + * @author Gabi Stanovsky + * @since Aug 2014 + */ +public class PredicateTruthException extends Exception +{ + private static final long serialVersionUID = 1L; + + public PredicateTruthException(String message, Throwable cause) + { + super(message, cause); + } + + public PredicateTruthException(String message) + { + super(message); + } + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/SingleTokenTruthAnnotation.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/SingleTokenTruthAnnotation.java new file mode 100644 index 00000000..b8840dcd --- /dev/null +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/SingleTokenTruthAnnotation.java @@ -0,0 +1,89 @@ +package eu.excitementproject.eop.transformations.biu.en.predicatetruth; + +import java.util.List; + +import eu.excitementproject.eop.transformations.representation.ExtendedNode; +import eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth; +import eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty; +import eu.excitementproject.eop.transformations.representation.annotations.PredTruth; +import eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature; + +/** + * A container for the result of truth annotation on a single token + * contains Predicate Truth, Clause Truth, Negation and Uncertainty, and Predicate Signature + * If any of these is missing, an empty string is expected. + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public class SingleTokenTruthAnnotation { + + private PredTruth predicateTruthValue; + private ClauseTruth clauseTruthValue; + private NegationAndUncertainty nuValue; + private PredicateSignature predicateSignatureValue; + + private List subtree; + private Integer subtreeMinimalIndex,subtreeMaximalIndex; + + public SingleTokenTruthAnnotation(PredTruth pt,ClauseTruth ct, NegationAndUncertainty nu, PredicateSignature sig, List sub){ + predicateTruthValue = pt; + clauseTruthValue = ct; + nuValue = nu; + predicateSignatureValue = sig; + subtree=sub; + subtreeMaximalIndex = null; + subtreeMinimalIndex = null; + } + + public SingleTokenTruthAnnotation(PredTruth pt,ClauseTruth ct, NegationAndUncertainty nu, PredicateSignature sig){ + this(pt,ct,nu,sig,null); + } + + + + public int getSubtreeMinimalIndex() { + return subtreeMinimalIndex; + } + + public void setSubtreeMinimalIndex(int subtreeMinimalIndex) { + this.subtreeMinimalIndex = subtreeMinimalIndex; + } + + public int getSubtreeMaximalIndex() { + return subtreeMaximalIndex; + } + + public void setSubtreeMaximalIndex(int subtreeMaximalIndex) { + this.subtreeMaximalIndex = subtreeMaximalIndex; + } + + public PredTruth getPredicateTruthValue() { + return predicateTruthValue; + } + + public ClauseTruth getClauseTruthValue() { + return clauseTruthValue; + } + + public NegationAndUncertainty getNuValue() { + return nuValue; + } + + public PredicateSignature getPredicateSignatureValue() { + return predicateSignatureValue; + } + + public List getSubtree() { + return subtree; + } + + + public void setSubtree(List subtree) { + this.subtree = subtree; + } + + + + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotator.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotator.java new file mode 100644 index 00000000..1cea1874 --- /dev/null +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotator.java @@ -0,0 +1,128 @@ +package eu.excitementproject.eop.transformations.biu.en.predicatetruth; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import eu.excitementproject.eop.common.representation.parse.tree.AbstractNodeUtils; +import eu.excitementproject.eop.transformations.generic.truthteller.AnnotatorException; +import eu.excitementproject.eop.transformations.generic.truthteller.DefaultSentenceAnnotator; +import eu.excitementproject.eop.transformations.representation.AdditionalNodeInformation; +import eu.excitementproject.eop.transformations.representation.ExtendedNode; + +/** + * An implementation of the "inner tool" of the analysis engine, + * serves a wrapper for the TruthTeller calls. + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public class TruthTellerAnnotator implements PredicateTruth { + + + private DefaultSentenceAnnotator annotator; + private File annotationRulesFile; + private ExtendedNode annotatedSentence; + private List annotationResult; + + /** + * Constructor which receives the annotation rules file + * @param IannotationRulesFile + * @throws PredicateTruthException + */ + public TruthTellerAnnotator(File annotationRulesFile) throws PredicateTruthException{ + this.annotationRulesFile = annotationRulesFile; + annotatedSentence = null; + } + + @Override + public void init() throws PredicateTruthException { + try { + annotator = new DefaultSentenceAnnotator(annotationRulesFile); + } catch (AnnotatorException e) { + throw new PredicateTruthException(e.getMessage(),e); + } + } + + @Override + public void setSentence(ExtendedNode annotatedSentence) { + this.annotatedSentence = annotatedSentence; + // clear annotation result + annotationResult = new ArrayList(); + } + + @Override + public void annotate() throws PredicateTruthException { + try { + // verify that setSentence was run before calling this function + if (annotatedSentence == null){ + throw new PredicateTruthException("annotate was called without first calling setSentence"); + } + // run TruthTeller + annotator.setTree(annotatedSentence); + annotator.annotate(); + ExtendedNode ttResult = annotator.getAnnotatedTree(); + Map annotationMap = new HashMap(); //needed since truth annotations won't be read in the sentence order + + // iterate over nodes and extract annotations to UIMA format + List nodes = AbstractNodeUtils.treeToList(ttResult); + + for (ExtendedNode node : nodes){ + int serial = node.getInfo().getNodeInfo().getSerial()-1; // this node's id in the original sentence + AdditionalNodeInformation info = node.getInfo().getAdditionalNodeInformation(); + // store result from info, according to index in the original sentence + SingleTokenTruthAnnotation singleTokenAnnotation =new SingleTokenTruthAnnotation(info.getPredTruth(),info.getClauseTruth(),info.getNegationAndUncertainty(),info.getPredicateSignature()); + + if (singleTokenAnnotation.getClauseTruthValue() !=null){ + // get a list of all subtree tokens, by getting the deep antecedent of all + // the subtree, and storing in the set - thus obtaining a unique copy of all "real" tokens + int minimalIndex = -1,maximalIndex = -1; // variables to store the boundaries of the subtree + Set subtree = new HashSet(); + for (ExtendedNode child : AbstractNodeUtils.treeToList(node)){ + ExtendedNode toAdd =AbstractNodeUtils.getDeepAntecedentOf(child); + int curId = node.getInfo().getNodeInfo().getSerial()-1; + subtree.add(toAdd); + // calculate boundaries + if ((minimalIndex == -1)||(curId < minimalIndex)){ + minimalIndex = curId; + } + if ((maximalIndex == -1)||(curId > maximalIndex)){ + maximalIndex = curId; + } + } + + // store the subtree and its boundaries + singleTokenAnnotation.setSubtree(new ArrayList(subtree)); + singleTokenAnnotation.setSubtreeMinimalIndex(minimalIndex); + singleTokenAnnotation.setSubtreeMaximalIndex(maximalIndex); + } + annotationMap.put(serial,singleTokenAnnotation); + + } + + //convert the map into a list - assumes there's a truth annotation for each token index + for (int i=0; i < annotationMap.size();i++){ + annotationResult.add(annotationMap.get(i)); + } + + } catch (AnnotatorException e) { + throw new PredicateTruthException(e.getMessage(),e); + } + } + + @Override + public List getAnnotatedEntities() { + return annotationResult; + } + + @Override + public void cleanUp() { + // stub - nothing to do to close TruthTeller + + } + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotatorAE.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotatorAE.java new file mode 100644 index 00000000..3a80fc1d --- /dev/null +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotatorAE.java @@ -0,0 +1,38 @@ +package eu.excitementproject.eop.transformations.biu.en.predicatetruth; + +import java.io.File; + +import org.uimafit.descriptor.ConfigurationParameter; + +import eu.excitementproject.eop.common.datastructures.Envelope; +import eu.excitementproject.eop.transformations.uima.ae.truthteller.PredicateTruthAE; + +/** + * Inherits truth annotations, and makes specific calls for Truth Teller's wrapper + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public class TruthTellerAnnotatorAE extends PredicateTruthAE { + + + // get the configuration parameter + public static final String PARAM_CONFIG = "annotationRulesFile"; + @ConfigurationParameter(name = PARAM_CONFIG, mandatory = true) + private File annotationRulesFile; + + + @Override + protected TruthTellerAnnotator buildInnerTool() throws Exception { + TruthTellerAnnotator ret = new TruthTellerAnnotator(annotationRulesFile); + ret.init(); + return ret; + } + + @Override + protected final Envelope getEnvelope(){return envelope;} + + + private static Envelope envelope = new Envelope(); + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAligner.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAligner.java new file mode 100644 index 00000000..2bdadbac --- /dev/null +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAligner.java @@ -0,0 +1,302 @@ +package eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.EmptyStringList; +import org.apache.uima.jcas.cas.FSArray; +import org.apache.uima.jcas.cas.NonEmptyStringList; +import org.apache.uima.jcas.cas.StringList; +import org.apache.uima.jcas.tcas.Annotation; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.Link.Direction; +import eu.excitement.type.alignment.Target; +import eu.excitement.type.predicatetruth.PredicateTruth; +import eu.excitement.type.predicatetruth.PredicateTruthNegative; +import eu.excitement.type.predicatetruth.PredicateTruthPositive; +import eu.excitement.type.predicatetruth.PredicateTruthUncertain; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.utilities.uima.UimaUtils; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + + + +/** + * Produces alignment links between the text and the hypothesis, + * based on the predicate truth annotations + *

+ * Usage: align a sentence pair by calling the annotate method. + * When the {@linkplain Aligner} object is no longer to be used, the + * {@link #cleanUp()} method should be called. + * + * @author Gabi Stanovsky + * @since Aug 2014 + */ + + +public class PredicateTruthAligner implements AlignmentComponent { + + private JCas textView, hypoView; + + //constant values used for aligner description + public static final String ALIGNER_ID = "PredicateTruth"; + public static final String ALIGNER_VERSION = "TruthTeller_1.0"; + public static final String ALIGNEMNT_TYPE_AGREEING_POSITIVE = "Agreeing_Positive_Predicate_Truth"; + public static final String ALIGNEMNT_TYPE_AGREEING_NEGATIVE = "Agreeing_Negative_Predicate_Truth"; + public static final String ALIGNEMNT_TYPE_DISAGREEING = "Disagreeing_Predicate_Truth"; + public static final String ALIGNEMNT_TYPE_NON_MATCHING = "Non_Matching_Predicate_Truth"; + //group labels + public static final String GROUP_LABEL_OPPOSITE_PREDICATE_TRUTH = "OPPOSITE_PREDICATE_TRUTH"; + public static final String GROUP_LABEL_SAME_PREDICATE_TRUTH = "SAME_PREDICATE_TRUTH"; + public static final String GROUP_LABEL_LOCAL_CONTRADICTION = "LOCAL_CONTRADICTION"; + public static final String GROUP_LABEL_LOCAL_ENTAILMENT = "LOCAL_ENTAILMENT"; + + //(currently) constant values used for alignment links + private static final double ALIGNER_CONFIDENCE = 1.0; + private static final Direction ALIGNER_DIRECTION = Direction.Bidirection; + + //store the annotations of predicate truth, for memoization + private Map,Collection> memoTextAnnots; + private Map,Collection> memoHypoAnnots; + private static final List> ptTypes = new ArrayList>(){ + private static final long serialVersionUID = 8489900798036315449L; + + { + add(PredicateTruthPositive.class); + add(PredicateTruthNegative.class); + add(PredicateTruthUncertain.class); + }}; + + + + /** + * default constructor + * set all members to null + */ + public PredicateTruthAligner(){ + textView = null; + hypoView = null; + } + + @Override + public void annotate(JCas aJCas) throws PairAnnotatorComponentException { + try { + // create possible group labels instances for this jcas + StringList localEntailment = createStringList(aJCas, new ArrayList() { + private static final long serialVersionUID = 1L; + + { + add(GROUP_LABEL_SAME_PREDICATE_TRUTH); + add(GROUP_LABEL_LOCAL_ENTAILMENT); + }}); + + StringList localContradiction = createStringList(aJCas, new ArrayList() { + private static final long serialVersionUID = 1L; + + { + add(GROUP_LABEL_OPPOSITE_PREDICATE_TRUTH); + add(GROUP_LABEL_LOCAL_CONTRADICTION); + }}); + + StringList emptyGroupLabel = new EmptyStringList(aJCas); + + + // Get the text and hypothesis views + textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + // Record annotations + memoTextAnnots = new HashMap,Collection>(); + memoHypoAnnots = new HashMap,Collection>(); + + for (Class ptType : ptTypes){ + memoTextAnnots.put(ptType, JCasUtil.select(textView, ptType)); + memoHypoAnnots.put(ptType, JCasUtil.select(hypoView, ptType)); + } + + + // add alignment links + // Agreeing Positive Predicate Truth + // PT+ <-> PT+ + createPredicateTruthLinks(PredicateTruthPositive.class,PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_AGREEING_POSITIVE,localEntailment); + + // Agreeing Negative Predicate Truth + // PT- <-> PT- + createPredicateTruthLinks(PredicateTruthNegative.class,PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_AGREEING_NEGATIVE,localEntailment); + + // Disagreeing Predicate Truth + // PT+ <-> PT- + createPredicateTruthLinks(PredicateTruthPositive.class,PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_DISAGREEING,localContradiction); + // PT- <-> PT+ + createPredicateTruthLinks(PredicateTruthNegative.class,PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_DISAGREEING,localContradiction); + + // Non Matching Predicate Truth + // PT+ <-> PT? + createPredicateTruthLinks(PredicateTruthPositive.class,PredicateTruthUncertain.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); + // PT- <-> PT? + createPredicateTruthLinks(PredicateTruthNegative.class,PredicateTruthUncertain.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); + // PT? <-> PT+ + createPredicateTruthLinks(PredicateTruthUncertain.class,PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); + // PT? <-> PT- + createPredicateTruthLinks(PredicateTruthUncertain.class,PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); + + } + catch (CASException e) { + throw new PairAnnotatorComponentException(e); + } + } + + + + @Override + public String getComponentName() { + // Name of this component that is used to identify the related configuration section + return this.getClass().getName(); + } + + @Override + public String getInstanceName() { + // This component does not support instance configuration + return null; + } + + /** + * Draw bidirectional links between all predicate truth annotation of type (TextType) in text and truth annotation of type (hypoType) in Hypothesis + * @param textType + * @param hypoType + * @param confidence + * @param linkDirection + * @param linkInfo + * @throws CASException + */ + private void createPredicateTruthLinks(Class textType, Class hypoType, double confidence,Direction linkDirection,String linkInfo,StringList linkGroupLabel) throws CASException{ + + // get relevant annotations from text and hypothesis - use pre-recorded annotations + Collection textAnnotations = memoTextAnnots.get(textType); + Collection hypoAnnotations = memoHypoAnnots.get(hypoType); + + // mark links between all of the found types + for (Annotation tAnno : textAnnotations){ + for (Annotation hAnno : hypoAnnotations){ + Token tToken = UimaUtils.selectCoveredSingle(textView, Token.class, tAnno); + Token hToken = UimaUtils.selectCoveredSingle(hypoView, Token.class, hAnno); + addAlignmentAnnotations(tToken,hToken, confidence, linkDirection, linkInfo, linkGroupLabel); + } + } + + } + + /** + * Add an alignment link from T to H, based on the rule t->h + * in which t is a phrase in T from index textStart to textEnd of the tokens, + * and h is a phrase in H from index hypoStart to hypoEnd of the tokens, + * @param textToken Token in TextView to annotate + * @param hypoToken Token in HypoView to annotate + * @param confidence The confidence of the rule + * @param linkDirection The direction of the link (t to h, h to t or bidirectional). + * @param linkInfo The relation of the rule (Wordnet synonym, Wikipedia redirect etc). + * @param linkGroupLabel + * @throws CASException + */ + private void addAlignmentAnnotations(Token textToken, Token hypoToken, + double confidence, + Direction linkDirection, + String linkInfo, StringList linkGroupLabel) + throws CASException { + + + // Prepare the Target instances + Target textTarget = new Target(textView); + Target hypoTarget = new Target(hypoView); + + + // Prepare an FSArray instance and put the target annotations in it + FSArray textAnnots = new FSArray(textView, 1); + FSArray hypoAnnots = new FSArray(hypoView, 1); + + textAnnots.set(0, textToken); + hypoAnnots.set(0, hypoToken); + + textTarget.setTargetAnnotations(textAnnots); + hypoTarget.setTargetAnnotations(hypoAnnots); + + // Set begin and end value of the Target annotations + textTarget.setBegin(textToken.getBegin()); + textTarget.setEnd(textToken.getEnd()); + hypoTarget.setBegin(hypoToken.getBegin()); + hypoTarget.setEnd(hypoToken.getEnd()); + + // Add the targets to the indices + textTarget.addToIndexes(); + hypoTarget.addToIndexes(); + + // Mark an alignment.Link and add it to the hypothesis view + Link link = new Link(hypoView); + link.setTSideTarget(textTarget); + link.setHSideTarget(hypoTarget); + + // Set the link direction + link.setDirection(linkDirection); + + // Set strength + link.setStrength(confidence); + + // Set Group label + link.setGroupLabel(linkGroupLabel); + + + // Add the link information + link.setAlignerID(ALIGNER_ID); + link.setAlignerVersion(ALIGNER_VERSION); + link.setLinkInfo(linkInfo); + + // Mark begin and end according to the hypothesis target + link.setBegin(hypoTarget.getBegin()); + link.setEnd(hypoTarget.getEnd()); + + // Add to index + link.addToIndexes(); + } + + /** + * Converts a collection of string into a a Uima Stringlist + * @param aJCas - Jcas to which to attach the string list? + * @param aCollection - the collection to be converted + * @return a Uima Stringlist, consisting of all the elements in aCollection + */ + private static StringList createStringList(JCas aJCas, + Collection aCollection) + { + if (aCollection.size() == 0) { + return new EmptyStringList(aJCas); + } + + NonEmptyStringList head = new NonEmptyStringList(aJCas); + NonEmptyStringList list = head; + Iterator i = aCollection.iterator(); + while (i.hasNext()) { + head.setHead(i.next()); + if (i.hasNext()) { + head.setTail(new NonEmptyStringList(aJCas)); + head = (NonEmptyStringList) head.getTail(); + } + else { + head.setTail(new EmptyStringList(aJCas)); + } + } + + return list; + } + + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/generic/truthteller/conll/AnnotateSentenceToConll.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/generic/truthteller/conll/AnnotateSentenceToConll.java index 9d9e4443..68b1d09b 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/generic/truthteller/conll/AnnotateSentenceToConll.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/generic/truthteller/conll/AnnotateSentenceToConll.java @@ -1,250 +1,250 @@ -/** - * - */ -package eu.excitementproject.eop.transformations.generic.truthteller.conll; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Vector; - -import org.apache.log4j.BasicConfigurator; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; - -import eu.excitementproject.eop.common.representation.parse.tree.dependency.basic.BasicNode; -import eu.excitementproject.eop.common.utilities.Utils; -import eu.excitementproject.eop.common.utilities.configuration.ConfigurationException; -import eu.excitementproject.eop.common.utilities.configuration.ConfigurationFile; -import eu.excitementproject.eop.common.utilities.configuration.ConfigurationFileDuplicateKeyException; -import eu.excitementproject.eop.common.utilities.configuration.ConfigurationParams; -import eu.excitementproject.eop.lap.biu.en.parser.ParserRunException; -import eu.excitementproject.eop.lap.biu.en.parser.easyfirst.EasyFirstParser; -import eu.excitementproject.eop.lap.biu.en.sentencesplit.LingPipeSentenceSplitter; -import eu.excitementproject.eop.lap.biu.sentencesplit.SentenceSplitter; -import eu.excitementproject.eop.lap.biu.sentencesplit.SentenceSplitterException; -import eu.excitementproject.eop.transformations.generic.truthteller.AnnotatorException; -import eu.excitementproject.eop.transformations.generic.truthteller.DefaultSentenceAnnotator; -import eu.excitementproject.eop.transformations.representation.ExtendedNode; -import eu.excitementproject.eop.transformations.utilities.TransformationsConfigurationParametersNames; -import eu.excitementproject.eop.transformations.utilities.parsetreeutils.TreeUtilities; - -/** - * @author Amnon Lotan - * - * @since Jul 18, 2012 - */ -public class AnnotateSentenceToConll { - - public static final String INPUT_FILE_INDICATOR = "-f"; - - private static Logger logger = null; - - private static AnnotatedConllStringConverter CONLL_CONVERTER = new AnnotatedConllStringConverter(); - private static SentenceSplitter SENTENCE_SPLITTER = new LingPipeSentenceSplitter(); - private EasyFirstParser parser; - private DefaultSentenceAnnotator annotator; - private final File conllOutputFolder; - - private ConfigurationParams annotationParams = null; - - /** - * Ctor - * @throws ConfigurationException - * @throws ConllConverterException - */ - public AnnotateSentenceToConll(ConfigurationFile confFile) throws ConfigurationException, ConllConverterException { - - confFile.setExpandingEnvironmentVariables(true); - annotationParams = confFile.getModuleConfiguration(TransformationsConfigurationParametersNames.TRUTH_TELLER_MODULE_NAME); - - try { - annotator = new DefaultSentenceAnnotator(annotationParams); - - String posTaggerString = annotationParams.get(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST); - String easyFirstHost = annotationParams.get(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST_HOST); - int easyFirstPort = annotationParams.getInt(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST_PORT); - parser = new EasyFirstParser(easyFirstHost, easyFirstPort, posTaggerString); - parser.init(); - } catch (Exception e) { - throw new ConllConverterException("see nested", e); - } - - String conllOutputFolderPath = annotationParams.get(TransformationsConfigurationParametersNames.CONLL_FORMAT_OUTPUT_DIRECTORY); - conllOutputFolder = new File(conllOutputFolderPath); - conllOutputFolder.mkdirs(); - } - - - /** - * Get some text, sentence split it, and return - * @param sentence - * @return - * @throws ConllConverterException - */ - public String textToAnnotatedConllFiles(String sentence) throws ConllConverterException - { - ExtendedNode annotatedSentece = annotateSentece(sentence); - String conllString = AnnotatedTreeToConllCoverter.treeToConll(annotatedSentece , CONLL_CONVERTER); - return conllString; - } - - /** - * Get a single sentence, annotate it, and return its string CoNLL representation. - * @param sentence - * @return - * @throws ConllConverterException - */ - public String sentenceToAnnotatedConllString(String sentence) throws ConllConverterException - { - ExtendedNode annotatedSentece = annotateSentece(sentence); - String conllString = AnnotatedTreeToConllCoverter.treeToConll(annotatedSentece , CONLL_CONVERTER); - return conllString; - } - - - public List getSentencesToAnnotate(String inputFileName) throws ConfigurationException, FileNotFoundException, IOException - { - List sentences = new LinkedList(); - File inputFile = new File(inputFileName); - try(BufferedReader reader = new BufferedReader(new FileReader(inputFile))) - { - String line = reader.readLine(); - while (line !=null) - { - sentences.add(line); - line = reader.readLine(); - } - } - return sentences; - } - - private ExtendedNode annotateSentece(String sentence) throws ConllConverterException - { - parser.setSentence(sentence); - ExtendedNode annotatedSentece; - try { - parser.parse(); - BasicNode parsedTree = parser.getParseTree(); - ExtendedNode extendedTree = TreeUtilities.copyFromBasicNode(parsedTree); - annotator.setTree(extendedTree); - annotator.annotate(); - annotatedSentece = annotator.getAnnotatedTree(); - } catch (Exception e) { - throw new ConllConverterException("see nested", e); - } - return annotatedSentece; - } - - /** - * Command Line DEMO for the TruthTeller: get the configuration file and text sentence(s), annotate the sentences and print each one in CoNLL format to a separate file. - * - * @param args - * @throws AnnotatorException - * @throws ConfigurationException - * @throws ConfigurationFileDuplicateKeyException - * @throws ParserRunException - * @throws ConllConverterException - * @throws SentenceSplitterException - * @throws IOException - * @throws FileNotFoundException - */ - public static void main(String[] args) - { - BasicConfigurator.configure(); - Logger.getRootLogger().setLevel(Level.INFO); - logger = Logger.getLogger(AnnotateSentenceToConll.class); - try - { - annotateByCommandLineArguments(args); - } - catch(Throwable t) - { - t.printStackTrace(System.out); - logger.error("TruthTeller failed.",t); - } - } - - private static Iterable getSentencesIterable(Iterator argsIterator, AnnotateSentenceToConll app) throws FileNotFoundException, ConfigurationException, IOException, SentenceSplitterException - { - List sentencesToAnnotate = null; - - - String firstArgumentAfterConfigurationFile = null; - if (argsIterator.hasNext()) - { - firstArgumentAfterConfigurationFile = argsIterator.next(); - } - - - if (INPUT_FILE_INDICATOR.equalsIgnoreCase(firstArgumentAfterConfigurationFile)) - { - if (argsIterator.hasNext()) - { - sentencesToAnnotate = app.getSentencesToAnnotate(argsIterator.next()); - } - else - { - throw new RuntimeException("No input file is given, though \""+INPUT_FILE_INDICATOR+"\" has been encountered as a command line argument."); - } - } - else - { - // Read the text from command line - StringBuffer sbInputWords = new StringBuffer(); - - if (firstArgumentAfterConfigurationFile!=null) - { - sbInputWords.append(firstArgumentAfterConfigurationFile); - while (argsIterator.hasNext()) - { - sbInputWords.append(" "); - sbInputWords.append(argsIterator.next()); - } - } - -// List listOfWords = Utils.arrayToCollection(args, new Vector()); -// listOfWords.remove(0); // remove the confFile parameter -// listOfWords.remove(1); // remove the pos-tagger-file-name -// String text = StringUtil.joinIterableToString(listOfWords, " "); - - String text = sbInputWords.toString(); - - SENTENCE_SPLITTER.setDocument(text); - SENTENCE_SPLITTER.split(); - sentencesToAnnotate = SENTENCE_SPLITTER.getSentences(); - } - - return sentencesToAnnotate; - } - - - private static void annotateByCommandLineArguments(String[] args) throws AnnotatorException, ConfigurationFileDuplicateKeyException, ConfigurationException, ParserRunException, ConllConverterException, SentenceSplitterException, FileNotFoundException, IOException - { - if (args.length < (1)) - throw new AnnotatorException(String.format("usage: %s configurationFile.xml sentence(s)", AnnotateSentenceToConll.class.getSimpleName())); - - List argsList = Utils.arrayToCollection(args, new Vector()); - Iterator argsIterator = argsList.iterator(); - - ConfigurationFile confFile = new ConfigurationFile(new File(argsIterator.next())); - confFile.setExpandingEnvironmentVariables(true); - AnnotateSentenceToConll app = new AnnotateSentenceToConll(confFile); - - - Iterable sentencesToAnnotate = getSentencesIterable(argsIterator,app); - - List list = new ArrayList(); - for (String sentence : sentencesToAnnotate) - { - ExtendedNode annotatedSentece = app.annotateSentece(sentence); - list.add(annotatedSentece); - } - AnnotatedTreeToConllCoverter.treesToConllFiles(list, app.conllOutputFolder, CONLL_CONVERTER); - } +/** + * + */ +package eu.excitementproject.eop.transformations.generic.truthteller.conll; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Vector; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + +import eu.excitementproject.eop.common.representation.parse.tree.dependency.basic.BasicNode; +import eu.excitementproject.eop.common.utilities.Utils; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationException; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationFile; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationFileDuplicateKeyException; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationParams; +import eu.excitementproject.eop.lap.biu.en.parser.ParserRunException; +import eu.excitementproject.eop.lap.biu.en.parser.easyfirst.EasyFirstParser; +import eu.excitementproject.eop.lap.biu.en.sentencesplit.LingPipeSentenceSplitter; +import eu.excitementproject.eop.lap.biu.sentencesplit.SentenceSplitter; +import eu.excitementproject.eop.lap.biu.sentencesplit.SentenceSplitterException; +import eu.excitementproject.eop.transformations.generic.truthteller.AnnotatorException; +import eu.excitementproject.eop.transformations.generic.truthteller.DefaultSentenceAnnotator; +import eu.excitementproject.eop.transformations.representation.ExtendedNode; +import eu.excitementproject.eop.transformations.utilities.TransformationsConfigurationParametersNames; +import eu.excitementproject.eop.transformations.utilities.parsetreeutils.TreeUtilities; + +/** + * @author Amnon Lotan + * + * @since Jul 18, 2012 + */ +public class AnnotateSentenceToConll { + + public static final String INPUT_FILE_INDICATOR = "-f"; + + private static Logger logger = null; + + private static AnnotatedConllStringConverter CONLL_CONVERTER = new AnnotatedConllStringConverter(); + private static SentenceSplitter SENTENCE_SPLITTER = new LingPipeSentenceSplitter(); + private EasyFirstParser parser; + private DefaultSentenceAnnotator annotator; + private final File conllOutputFolder; + + private ConfigurationParams annotationParams = null; + + /** + * Ctor + * @throws ConfigurationException + * @throws ConllConverterException + */ + public AnnotateSentenceToConll(ConfigurationFile confFile) throws ConfigurationException, ConllConverterException { + + confFile.setExpandingEnvironmentVariables(true); + annotationParams = confFile.getModuleConfiguration(TransformationsConfigurationParametersNames.TRUTH_TELLER_MODULE_NAME); + + try { + annotator = new DefaultSentenceAnnotator(annotationParams); + + String posTaggerString = annotationParams.get(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST); + String easyFirstHost = annotationParams.get(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST_HOST); + int easyFirstPort = annotationParams.getInt(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST_PORT); + parser = new EasyFirstParser(easyFirstHost, easyFirstPort, posTaggerString); + parser.init(); + } catch (Exception e) { + throw new ConllConverterException("see nested", e); + } + + String conllOutputFolderPath = annotationParams.get(TransformationsConfigurationParametersNames.CONLL_FORMAT_OUTPUT_DIRECTORY); + conllOutputFolder = new File(conllOutputFolderPath); + conllOutputFolder.mkdirs(); + } + + + /** + * Get some text, sentence split it, and return + * @param sentence + * @return + * @throws ConllConverterException + */ + public String textToAnnotatedConllFiles(String sentence) throws ConllConverterException + { + ExtendedNode annotatedSentece = annotateSentece(sentence); + String conllString = AnnotatedTreeToConllCoverter.treeToConll(annotatedSentece , CONLL_CONVERTER); + return conllString; + } + + /** + * Get a single sentence, annotate it, and return its string CoNLL representation. + * @param sentence + * @return + * @throws ConllConverterException + */ + public String sentenceToAnnotatedConllString(String sentence) throws ConllConverterException + { + ExtendedNode annotatedSentece = annotateSentece(sentence); + String conllString = AnnotatedTreeToConllCoverter.treeToConll(annotatedSentece , CONLL_CONVERTER); + return conllString; + } + + + public List getSentencesToAnnotate(String inputFileName) throws ConfigurationException, FileNotFoundException, IOException + { + List sentences = new LinkedList(); + File inputFile = new File(inputFileName); + try(BufferedReader reader = new BufferedReader(new FileReader(inputFile))) + { + String line = reader.readLine(); + while (line !=null) + { + sentences.add(line); + line = reader.readLine(); + } + } + return sentences; + } + + private ExtendedNode annotateSentece(String sentence) throws ConllConverterException + { + parser.setSentence(sentence); + ExtendedNode annotatedSentece; + try { + parser.parse(); + BasicNode parsedTree = parser.getParseTree(); + ExtendedNode extendedTree = TreeUtilities.copyFromBasicNode(parsedTree); + annotator.setTree(extendedTree); + annotator.annotate(); + annotatedSentece = annotator.getAnnotatedTree(); + } catch (Exception e) { + throw new ConllConverterException("see nested", e); + } + return annotatedSentece; + } + + /** + * Command Line DEMO for the TruthTeller: get the configuration file and text sentence(s), annotate the sentences and print each one in CoNLL format to a separate file. + * + * @param args + * @throws AnnotatorException + * @throws ConfigurationException + * @throws ConfigurationFileDuplicateKeyException + * @throws ParserRunException + * @throws ConllConverterException + * @throws SentenceSplitterException + * @throws IOException + * @throws FileNotFoundException + */ + public static void main(String[] args) + { + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.INFO); + logger = Logger.getLogger(AnnotateSentenceToConll.class); + try + { + annotateByCommandLineArguments(args); + } + catch(Throwable t) + { + t.printStackTrace(System.out); + logger.error("TruthTeller failed.",t); + } + } + + private static Iterable getSentencesIterable(Iterator argsIterator, AnnotateSentenceToConll app) throws FileNotFoundException, ConfigurationException, IOException, SentenceSplitterException + { + List sentencesToAnnotate = null; + + + String firstArgumentAfterConfigurationFile = null; + if (argsIterator.hasNext()) + { + firstArgumentAfterConfigurationFile = argsIterator.next(); + } + + + if (INPUT_FILE_INDICATOR.equalsIgnoreCase(firstArgumentAfterConfigurationFile)) + { + if (argsIterator.hasNext()) + { + sentencesToAnnotate = app.getSentencesToAnnotate(argsIterator.next()); + } + else + { + throw new RuntimeException("No input file is given, though \""+INPUT_FILE_INDICATOR+"\" has been encountered as a command line argument."); + } + } + else + { + // Read the text from command line + StringBuffer sbInputWords = new StringBuffer(); + + if (firstArgumentAfterConfigurationFile!=null) + { + sbInputWords.append(firstArgumentAfterConfigurationFile); + while (argsIterator.hasNext()) + { + sbInputWords.append(" "); + sbInputWords.append(argsIterator.next()); + } + } + +// List listOfWords = Utils.arrayToCollection(args, new Vector()); +// listOfWords.remove(0); // remove the confFile parameter +// listOfWords.remove(1); // remove the pos-tagger-file-name +// String text = StringUtil.joinIterableToString(listOfWords, " "); + + String text = sbInputWords.toString(); + + SENTENCE_SPLITTER.setDocument(text); + SENTENCE_SPLITTER.split(); + sentencesToAnnotate = SENTENCE_SPLITTER.getSentences(); + } + + return sentencesToAnnotate; + } + + + private static void annotateByCommandLineArguments(String[] args) throws AnnotatorException, ConfigurationFileDuplicateKeyException, ConfigurationException, ParserRunException, ConllConverterException, SentenceSplitterException, FileNotFoundException, IOException + { + if (args.length < (1)) + throw new AnnotatorException(String.format("usage: %s configurationFile.xml sentence(s)", AnnotateSentenceToConll.class.getSimpleName())); + + List argsList = Utils.arrayToCollection(args, new Vector()); + Iterator argsIterator = argsList.iterator(); + + ConfigurationFile confFile = new ConfigurationFile(new File(argsIterator.next())); + confFile.setExpandingEnvironmentVariables(true); + AnnotateSentenceToConll app = new AnnotateSentenceToConll(confFile); + + + Iterable sentencesToAnnotate = getSentencesIterable(argsIterator,app); + + List list = new ArrayList(); + for (String sentence : sentencesToAnnotate) + { + ExtendedNode annotatedSentece = app.annotateSentece(sentence); + list.add(annotatedSentece); + } + AnnotatedTreeToConllCoverter.treesToConllFiles(list, app.conllOutputFolder, CONLL_CONVERTER); + } } diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTeller.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTeller.java new file mode 100644 index 00000000..14ea8c13 --- /dev/null +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTeller.java @@ -0,0 +1,92 @@ +package eu.excitementproject.eop.transformations.uima; +import static org.uimafit.factory.AnalysisEngineFactory.createPrimitiveDescription; + +import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.resource.ResourceInitializationException; + +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.configuration.NameValueTable; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.biu.uima.ae.ner.StanfordNamedEntityRecognizerAE; +import eu.excitementproject.eop.lap.biu.uima.ae.parser.EasyFirstParserAE; +import eu.excitementproject.eop.lap.biu.uima.ae.postagger.MaxentPosTaggerAE; +import eu.excitementproject.eop.lap.biu.uima.ae.sentencesplitter.LingPipeSentenceSplitterAE; +import eu.excitementproject.eop.lap.biu.uima.ae.tokenizer.MaxentTokenizerAE; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBaseAE; +import eu.excitementproject.eop.transformations.biu.en.predicatetruth.TruthTellerAnnotatorAE; +import eu.excitementproject.eop.transformations.utilities.TransformationsConfigurationParametersNames; + +/** + * A class to extend BIUFullLAP with truth annotations + * This is implemented within the Transformations package in order to avoid circular dependency between packages + * @author Gabi Stanovsky + * @since Aug 2014 + */ + + +public class BIUFullLAPWithTruthTeller extends LAP_ImplBaseAE implements LAPAccess { + + + public BIUFullLAPWithTruthTeller(String taggerModelFile, String nerModelFile, String parserHost, Integer parserPort, String truthTellerAnnotationsFile) throws LAPException { + try + { + // Step a) Build analysis engine descriptions + AnalysisEngineDescription splitter = createPrimitiveDescription(LingPipeSentenceSplitterAE.class); + AnalysisEngineDescription tokenizer = createPrimitiveDescription(MaxentTokenizerAE.class); + AnalysisEngineDescription tagger = createPrimitiveDescription(MaxentPosTaggerAE.class, + MaxentPosTaggerAE.PARAM_MODEL_FILE , taggerModelFile); + AnalysisEngineDescription ner = createPrimitiveDescription(StanfordNamedEntityRecognizerAE.class, + StanfordNamedEntityRecognizerAE.PARAM_MODEL_FILE , nerModelFile); + AnalysisEngineDescription parser = createPrimitiveDescription(EasyFirstParserAE.class, + EasyFirstParserAE.PARAM_HOST , parserHost, + EasyFirstParserAE.PARAM_PORT , parserPort + ); + AnalysisEngineDescription truthteller = createPrimitiveDescription(TruthTellerAnnotatorAE.class, + TruthTellerAnnotatorAE.PARAM_CONFIG , truthTellerAnnotationsFile); + + AnalysisEngineDescription[] descs = new AnalysisEngineDescription[] { + splitter, + tokenizer, + tagger, + ner, + parser, + truthteller, + }; + + // Step b) call initializeViews() + // initialize view with EOP default views. + initializeViews(descs); + + // Step c) set lang ID + languageIdentifier = "EN"; + } + catch (ResourceInitializationException e) + { + throw new LAPException(e); + } + } + + public BIUFullLAPWithTruthTeller(NameValueTable biuFullLAPSection, NameValueTable truthTellerSection) throws LAPException, ConfigurationException { + this( + biuFullLAPSection.getFile(DEFAULT_TAGGER_MODEL_FILE_PARAM).getAbsolutePath(), + biuFullLAPSection.getFile(DEFAULT_NER_MODEL_FILE_PARAM).getAbsolutePath(), + biuFullLAPSection.getString(DEFAULT_PARSER_HOST_NAME), + biuFullLAPSection.getInteger(DEFAULT_PARSER_PORT_NAME), + truthTellerSection.getFile(TransformationsConfigurationParametersNames.ANNOTATION_RULES_FILE).getAbsolutePath() + ); + } + + public BIUFullLAPWithTruthTeller(CommonConfig config) throws LAPException, ConfigurationException { + this(config.getSection(DEFAULT_SECTION_NAME), + config.getSection(TransformationsConfigurationParametersNames.TRUTH_TELLER_MODULE_NAME)); + } + + private static final String DEFAULT_SECTION_NAME = "rte_pairs_preprocess"; + private static final String DEFAULT_TAGGER_MODEL_FILE_PARAM = "easyfirst_stanford_pos_tagger"; + private static final String DEFAULT_NER_MODEL_FILE_PARAM = "stanford_ner_classifier_path"; + private static final String DEFAULT_PARSER_HOST_NAME = "easyfirst_host"; + private static final String DEFAULT_PARSER_PORT_NAME = "easyfirst_port"; + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAE.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAE.java new file mode 100644 index 00000000..f88e3c35 --- /dev/null +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAE.java @@ -0,0 +1,117 @@ +package eu.excitementproject.eop.transformations.uima.ae.truthteller; + +import java.util.ArrayList; +import java.util.List; +import java.util.ListIterator; + +import org.apache.uima.UimaContext; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.predicatetruth.ClauseTruth; +import eu.excitement.type.predicatetruth.NegationAndUncertainty; +import eu.excitement.type.predicatetruth.PredicateSignature; +import eu.excitement.type.predicatetruth.PredicateTruth; +import eu.excitementproject.eop.common.representation.partofspeech.UnsupportedPosTagStringException; +import eu.excitementproject.eop.lap.biu.uima.CasTreeConverter; +import eu.excitementproject.eop.lap.biu.uima.CasTreeConverterException; +import eu.excitementproject.eop.lap.biu.uima.ae.SingletonSynchronizedAnnotator; +import eu.excitementproject.eop.transformations.biu.en.predicatetruth.PredicateTruthException; +import eu.excitementproject.eop.transformations.biu.en.predicatetruth.SingleTokenTruthAnnotation; +import eu.excitementproject.eop.transformations.representation.ExtendedNode; +import eu.excitementproject.eop.transformations.utilities.parsetreeutils.TreeUtilities; + +/** + * An analysis engine for truth annotations + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public abstract class PredicateTruthAE extends SingletonSynchronizedAnnotator { + + private CasTreeConverter converter; + + @Override + public void initialize(UimaContext aContext) throws ResourceInitializationException{ + super.initialize(aContext); + converter = new CasTreeConverter(); + } + + @Override + public void process(JCas aJCas) throws AnalysisEngineProcessException { + try { + // Get the raw sentences from the CAS + for (Sentence sentenceAnno : JCasUtil.select(aJCas, Sentence.class)) { + // get a list of all tokens in the current sentence + List tokens = JCasUtil.selectCovered(aJCas, Token.class, sentenceAnno); + List taggedTokens; + ExtendedNode annotatedSentence; + annotatedSentence = TreeUtilities.copyFromBasicNode(converter.convertSingleSentenceToTree(aJCas, sentenceAnno)); + + // run inner tool to obtain truth annotations + synchronized (innerTool) { + innerTool.setSentence(annotatedSentence); + innerTool.annotate(); + taggedTokens = innerTool.getAnnotatedEntities(); + } + + // iterate over all tokens and obtain their truth annotations + for (ListIterator it = tokens.listIterator(); it.hasNext();) { + int curIndex = it.nextIndex(); + Token tokenAnno = it.next(); + SingleTokenTruthAnnotation annotationResult = taggedTokens.get(curIndex); + + + if (annotationResult.getPredicateTruthValue() != null){ + // Predicate Truth + PredicateTruth ptTag = TruthMapping.mapPredicateTruth(annotationResult.getPredicateTruthValue(),aJCas,tokenAnno.getBegin(), tokenAnno.getEnd()); + ptTag.addToIndexes(); + + } + if (annotationResult.getClauseTruthValue() != null){ + // Clause Truth + //in this case the annotation result must hold a subordinate clause - pass it to the truth mapping + + //calculate a Token list from extendedNode list + List subtree = new ArrayList(); + for (ExtendedNode e : annotationResult.getSubtree()){ + subtree.add(tokens.get(e.getInfo().getNodeInfo().getSerial()-1)); + } + + // get boundaries from annotationResult and get them from the token's begin and and + int begin = tokens.get(annotationResult.getSubtreeMinimalIndex()).getBegin(), + end = tokens.get(annotationResult.getSubtreeMaximalIndex()).getEnd(); + ClauseTruth ctTag = TruthMapping.mapClauseTruth(annotationResult.getClauseTruthValue(), aJCas, subtree,begin,end); + ctTag.addToIndexes(); + + + } + + if (annotationResult.getNuValue() != null){ + // Negation and Uncertainty + NegationAndUncertainty nuTag = TruthMapping.mapNegationAndUncertainty(annotationResult.getNuValue(),aJCas,tokenAnno.getBegin(), tokenAnno.getEnd()); + nuTag.addToIndexes(); + } + + if (annotationResult.getPredicateSignatureValue() != null){ + // Predicate Signature + PredicateSignature sigTag = TruthMapping.mapPredicateSignature(annotationResult.getPredicateSignatureValue(),aJCas,tokenAnno.getBegin(), tokenAnno.getEnd()); + sigTag.addToIndexes(); + } + + + } + } + } + catch (CasTreeConverterException + | UnsupportedPosTagStringException + | PredicateTruthException e ) { + throw new AnalysisEngineProcessException(AnalysisEngineProcessException.ANNOTATOR_EXCEPTION, null, e); + } + } + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/TruthMapping.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/TruthMapping.java new file mode 100644 index 00000000..ecf4034e --- /dev/null +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/TruthMapping.java @@ -0,0 +1,133 @@ +package eu.excitementproject.eop.transformations.uima.ae.truthteller; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.uima.cas.Type; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.FSArray; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.predicatetruth.ClauseTruth; +import eu.excitement.type.predicatetruth.ClauseTruthNegative; +import eu.excitement.type.predicatetruth.ClauseTruthNotIdentified; +import eu.excitement.type.predicatetruth.ClauseTruthPositive; +import eu.excitement.type.predicatetruth.ClauseTruthUncertain; +import eu.excitement.type.predicatetruth.NegationAndUncertainty; +import eu.excitement.type.predicatetruth.NegationAndUncertaintyNegative; +import eu.excitement.type.predicatetruth.NegationAndUncertaintyPositive; +import eu.excitement.type.predicatetruth.NegationAndUncertaintyUncertain; +import eu.excitement.type.predicatetruth.PredicateSignature; +import eu.excitement.type.predicatetruth.PredicateSignatureNegativeNegative; +import eu.excitement.type.predicatetruth.PredicateSignatureNegativePositive; +import eu.excitement.type.predicatetruth.PredicateSignatureNegativeUncertain; +import eu.excitement.type.predicatetruth.PredicateSignaturePositiveNegative; +import eu.excitement.type.predicatetruth.PredicateSignaturePositivePositive; +import eu.excitement.type.predicatetruth.PredicateSignaturePositiveUncertain; +import eu.excitement.type.predicatetruth.PredicateSignatureUncertainNegative; +import eu.excitement.type.predicatetruth.PredicateSignatureUncertainPositive; +import eu.excitement.type.predicatetruth.PredicateSignatureUncertainUncertain; +import eu.excitement.type.predicatetruth.PredicateTruth; +import eu.excitement.type.predicatetruth.PredicateTruthNegative; +import eu.excitement.type.predicatetruth.PredicateTruthNotIdentified; +import eu.excitement.type.predicatetruth.PredicateTruthPositive; +import eu.excitement.type.predicatetruth.PredicateTruthUncertain; +import eu.excitementproject.eop.transformations.representation.annotations.PredTruth; + +/** + * Conversion class from Truthteller's annotations to UIMA annotations + * Each static function converts a different annotation type. + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public class TruthMapping { + + public static PredicateTruth mapPredicateTruth(PredTruth pt, JCas jcas, int begin, int end){ + Type type = jcas.getTypeSystem().getType(PRED_TRUTH_MAP.get(pt).getName()); + PredicateTruth ret = (PredicateTruth)jcas.getCas().createAnnotation(type, begin, end); + return ret; + } + + public static ClauseTruth mapClauseTruth(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth ct, JCas jcas, List subtree,int begin,int end){ + int subtreeSize = subtree.size(); + Type type = jcas.getTypeSystem().getType(CLAUSE_TRUTH_MAP.get(ct).getName()); + ClauseTruth ret = (ClauseTruth)jcas.getCas().createAnnotation(type, begin, end); + + // set the subtree tokens as a feature structure + FSArray subtreeFSArray = new FSArray(jcas, subtreeSize); + subtreeFSArray.copyFromArray(subtree.toArray(new Token[subtree.size()]), 0, 0, subtreeSize); + ret.setClauseTokens(subtreeFSArray); + return ret; + } + + public static NegationAndUncertainty mapNegationAndUncertainty(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty nu, JCas jcas,int begin,int end){ + Type type = jcas.getTypeSystem().getType(NU_MAP.get(nu).getName()); + NegationAndUncertainty ret = (NegationAndUncertainty)jcas.getCas().createAnnotation(type, begin, end); + return ret; + } + + public static PredicateSignature mapPredicateSignature(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature sig,JCas jcas,int begin,int end){ + Type type = jcas.getTypeSystem().getType(SIG_MAP.get(sig).getName()); + PredicateSignature ret = (PredicateSignature)jcas.getCas().createAnnotation(type, begin, end); + return ret; + } + + //static mapping from TruthTeller types to UIMA types + public static Map> PRED_TRUTH_MAP = new HashMap>(); + public static Map> CLAUSE_TRUTH_MAP = new HashMap>(); + public static Map> NU_MAP = new HashMap>(); + public static Map> SIG_MAP = new HashMap>(); + static + { + // predicate truth mapping + PRED_TRUTH_MAP.put(PredTruth.P, PredicateTruthPositive.class); + PRED_TRUTH_MAP.put(PredTruth.N, PredicateTruthNegative.class); + PRED_TRUTH_MAP.put(PredTruth.U, PredicateTruthUncertain.class); + PRED_TRUTH_MAP.put(PredTruth.O, PredicateTruthNotIdentified.class); + + // clause truth mapping + CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.P, ClauseTruthPositive.class); + CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.N, ClauseTruthNegative.class); + CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.U, ClauseTruthUncertain.class); + CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.O, ClauseTruthNotIdentified.class); + + // negation and uncertainty mapping + NU_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty.P, NegationAndUncertaintyPositive.class); + NU_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty.N, NegationAndUncertaintyNegative.class); + NU_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty.U, NegationAndUncertaintyUncertain.class); + + // predicate signature mapping + // signature: -/- + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_N, PredicateSignatureNegativeNegative.class); + // signature: -/+ + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_P, PredicateSignatureNegativePositive.class); + // signature: -/? + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_U, PredicateSignatureNegativeUncertain.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_U_InfP, PredicateSignatureNegativeUncertain.class); + // signature: +/- + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_N, PredicateSignaturePositiveNegative.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_N_InfP, PredicateSignaturePositiveNegative.class); + // signature: +/+ + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P, PredicateSignaturePositivePositive.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP, PredicateSignaturePositivePositive.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_N_P_InfP, PredicateSignaturePositivePositive.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_N_U_InfP, PredicateSignaturePositivePositive.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_P_N_InfP, PredicateSignaturePositivePositive.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_P_U_InfP, PredicateSignaturePositivePositive.class); + // signature: +/? + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_U, PredicateSignaturePositiveUncertain.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_U_FinP, PredicateSignaturePositiveUncertain.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_U_InfP, PredicateSignaturePositiveUncertain.class); + // signature: ?/- + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.U_N, PredicateSignatureUncertainNegative.class); + // signature: ?/+ + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.U_P, PredicateSignatureUncertainPositive.class); + // signature: ?/? (default in unknown cases) + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.U_U, PredicateSignatureUncertainUncertain.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.NOT_IN_LEXICON, PredicateSignatureUncertainUncertain.class); + }; + + +} diff --git a/transformations/src/test/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAlignerTest.java b/transformations/src/test/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAlignerTest.java new file mode 100644 index 00000000..7a19c519 --- /dev/null +++ b/transformations/src/test/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAlignerTest.java @@ -0,0 +1,193 @@ +package eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink; + +import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_AGREEING_NEGATIVE; +import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_AGREEING_POSITIVE; +import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_DISAGREEING; +import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_NON_MATCHING; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.junit.BeforeClass; +import org.junit.Test; +import org.uimafit.util.JCasUtil; + +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.Link.Direction; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.biu.test.BiuTestUtils; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; +import eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner; +import eu.excitementproject.eop.transformations.uima.BIUFullLAPWithTruthTellerConfigured; +/** + * A test class for {@link PredicateTruthAligner} + * This test class must reside within transformations since it makes use the truthteller annotator, + * which is not accessible from core (where the aligner is implemented) + * @author Gabi Stanovsky + * @since Aug 2014 + * + */ +public class PredicateTruthAlignerTest { + + @BeforeClass + public static void beforeClass() throws IOException, LAPException, PairAnnotatorComponentException, CASException { + // Run test only under BIU environment + BiuTestUtils.assumeBiuEnvironment(); + // create a lap with truth teller annotator + lap = new BIUFullLAPWithTruthTellerConfigured(); + // create an aligner + aligner = new PredicateTruthAligner(); + // annotations for reference text - all tests will examine this result + jcas = lap.generateSingleTHPairCAS(testText, testHypothesis); + aligner.annotate(jcas); + hypoView = jcas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + } + + @Test + public void testAgreeingPositive() throws Exception { + List observed = new ArrayList(); + // get all positive agreeing links + for (Link link : JCasUtil.select(hypoView, Link.class)) { + if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_AGREEING_POSITIVE)){ + observed.add(link); + } + } + // verify that the observed links match the expected + assertAlignmentListEqual(observed, positiveAgreeingExpectedText, positiveAgreeingExpectedHypo); + } + + @Test + public void testAgreeingNegative() throws Exception { + List observed = new ArrayList(); + // get all positive agreeing links + for (Link link : JCasUtil.select(hypoView, Link.class)) { + if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_AGREEING_NEGATIVE)){ + observed.add(link); + } + } + // verify that the observed links match the expected + assertAlignmentListEqual(observed, negativeAgreeingExpectedText, negativeAgreeingExpectedHypo); + } + + @Test + public void testDisagreeing() throws Exception { + List observed = new ArrayList(); + // get all positive agreeing links + for (Link link : JCasUtil.select(hypoView, Link.class)) { + if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_DISAGREEING)){ + observed.add(link); + } + } + // verify that the observed links match the expected + assertAlignmentListEqual(observed, disagreeingExpectedText, disagreeingExpectedHypo); + } + + @Test + public void testNonMatching() throws Exception { + List observed = new ArrayList(); + // get all positive agreeing links + for (Link link : JCasUtil.select(hypoView, Link.class)) { + if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_NON_MATCHING)){ + observed.add(link); + } + } + // verify that the observed links match the expected + assertAlignmentListEqual(observed, nonMatchingExpectedText, nonMatchingExpectedHypo); + } + + /** + * Verify that an observed list of annotations covers an expected list of strings + * @param observed + * @param expected + */ + private void assertAlignmentListEqual(Collection observed, List expectedText,List expectedHypo){ + // assert expected and observed annotations are of the same size + int s = observed.size(); + assertEquals(s,expectedText.size()); + + //iterate over expected and observed annotations and assert all are equal + Iterator expectedTextIter = expectedText.iterator(); + Iterator expectedHypoIter = expectedHypo.iterator(); + Iterator observedIter = observed.iterator(); + + for (int i=0;i positiveAgreeingExpectedText = new ArrayList(); + private static List positiveAgreeingExpectedHypo = new ArrayList(); + + //Agreeing Negative + private static List negativeAgreeingExpectedText = new ArrayList(); + private static List negativeAgreeingExpectedHypo = new ArrayList(); + + // Disagreeing + private static List disagreeingExpectedText = new ArrayList(); + private static List disagreeingExpectedHypo = new ArrayList(); + + // Non Matching + private static List nonMatchingExpectedText = new ArrayList(); + private static List nonMatchingExpectedHypo = new ArrayList(); + + + + static{ + positiveAgreeingExpectedText.add("refused"); + positiveAgreeingExpectedHypo.add("did"); + positiveAgreeingExpectedText.add("thought"); + positiveAgreeingExpectedHypo.add("did"); + + negativeAgreeingExpectedText.add("dance"); + negativeAgreeingExpectedHypo.add("dance"); + + disagreeingExpectedText.add("dance"); + disagreeingExpectedHypo.add("did"); + disagreeingExpectedText.add("thought"); + disagreeingExpectedHypo.add("dance"); + disagreeingExpectedText.add("refused"); + disagreeingExpectedHypo.add("dance"); + + nonMatchingExpectedText.add("jumping"); + nonMatchingExpectedHypo.add("did"); + nonMatchingExpectedText.add("jumping"); + nonMatchingExpectedHypo.add("dance"); + + } + + +} + + diff --git a/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTellerConfigured.java b/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTellerConfigured.java new file mode 100644 index 00000000..0982c58e --- /dev/null +++ b/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTellerConfigured.java @@ -0,0 +1,24 @@ +package eu.excitementproject.eop.transformations.uima; + +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.biu.test.BiuTestParams; + +/*** + * A version of {@link BIUFullLAPWithTruthTeller} that is configured to paths relative to the biutee/workdir folder. + * Should be used only for testing, as these values should be read for configuration in other scenarios. + * + * @author Gabi Stanovsky + * @since August 2014 + */ + + +public class BIUFullLAPWithTruthTellerConfigured extends BIUFullLAPWithTruthTeller { + public BIUFullLAPWithTruthTellerConfigured() throws LAPException { + super( BiuTestParams.MAXENT_POS_TAGGER_MODEL_FILE, + BiuTestParams.STANFORD_NER_CLASSIFIER_PATH, + BiuTestParams.EASYFIRST_HOST, + BiuTestParams.EASYFIRST_PORT, + BiuTestParams.TRUTH_TELLER_MODEL_FILE); + } + +} diff --git a/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAETest.java b/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAETest.java new file mode 100644 index 00000000..e5ea41e9 --- /dev/null +++ b/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAETest.java @@ -0,0 +1,118 @@ +package eu.excitementproject.eop.transformations.uima.ae.truthteller; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; +import org.junit.BeforeClass; +import org.junit.Test; +import org.uimafit.util.JCasUtil; + +import eu.excitement.type.predicatetruth.ClauseTruthNegative; +import eu.excitement.type.predicatetruth.NegationAndUncertaintyNegative; +import eu.excitement.type.predicatetruth.PredicateTruth; +import eu.excitement.type.predicatetruth.PredicateTruthNegative; +import eu.excitement.type.predicatetruth.PredicateTruthPositive; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.biu.test.BiuTestUtils; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; +import eu.excitementproject.eop.transformations.biu.en.predicatetruth.TruthTellerAnnotatorAE; +import eu.excitementproject.eop.transformations.uima.BIUFullLAPWithTruthTellerConfigured; + +/** + * A test class for {@link TruthTellerAnnotatorAE} + * @author Gabi Stanovsky + * @since Aug 2014 + * + */ +public class PredicateTruthAETest { + + @BeforeClass + public static void beforeClass() throws LAPException, CASException, IOException { + // Run test only under BIU environment + BiuTestUtils.assumeBiuEnvironment(); + // create a lap with truth teller annotator + lap = new BIUFullLAPWithTruthTellerConfigured(); + // annotations for reference text - all tests will examine this result + jcas = lap.generateSingleTHPairCAS(testText, testHypothesis); + tView = jcas.getView(LAP_ImplBase.TEXTVIEW); + hView = jcas.getView(LAP_ImplBase.HYPOTHESISVIEW); + } + + @Test + public void testPT() throws Exception { + Collection annotations = new ArrayList(JCasUtil.select(tView, PredicateTruth.class)); + assertPTListEqual(annotations,ptExpected); + } + + @Test + public void testPTPositive() throws Exception { + Collection annotations = new ArrayList(JCasUtil.select(tView, PredicateTruthPositive.class)); + assertPTListEqual(annotations,ptPositiveExpected); + } + + @Test + public void testPTNegative() throws Exception { + Collection annotations = new ArrayList(JCasUtil.select(tView, PredicateTruthNegative.class)); + assertPTListEqual(annotations,ptNegativeExpected); + } + + @Test + public void testNU() throws Exception { + Collection annotations = new ArrayList(JCasUtil.select(hView, NegationAndUncertaintyNegative.class)); + assertPTListEqual(annotations,nuNegativeExpected); + } + + @Test + public void testCTNegative() throws Exception{ + Collection annotations = new ArrayList(JCasUtil.select(tView, ClauseTruthNegative.class)); + assertPTListEqual(annotations,ctNegativeExpected); + } + + /** + * Verify that an observed list of annotations covers an expected list of strings + * @param observed + * @param expected + */ + private static void assertPTListEqual(Collection observed, List expected){ + // assert expected and observed annotations are of the same size + int s = observed.size(); + assertEquals(s,expected.size()); + + //iterate over expected and observed annotations and assert all are equal + Iterator observedIter = observed.iterator(); + Iterator expectedIter = expected.iterator(); + for (int i=0;i ptExpected = new ArrayList(); + private static List ptPositiveExpected = new ArrayList(); + private static List ptNegativeExpected = new ArrayList(); + private static List nuNegativeExpected = new ArrayList(); + private static List ctNegativeExpected = new ArrayList(); + + static{ + ptExpected.add("refused"); + ptExpected.add("dance"); + ptPositiveExpected.add("refused"); + ptNegativeExpected.add("dance"); + nuNegativeExpected.add("dance"); + ctNegativeExpected.add("dance"); + } +} diff --git a/util/pom.xml b/util/pom.xml index 83b43453..9d23a6d9 100644 --- a/util/pom.xml +++ b/util/pom.xml @@ -4,7 +4,7 @@ eu.excitementproject eop - 1.1.3 + 1.1.4 util util @@ -25,17 +25,17 @@ eu.excitementproject common - 1.1.3 + 1.1.4 eu.excitementproject core - 1.1.3 + 1.1.4 eu.excitementproject lap - 1.1.2 + 1.1.4 args4j @@ -46,10 +46,39 @@ eu.excitementproject biutee - 1.1.3 + 1.1.4 + + + + + + + + FBK diff --git a/util/src/main/java/eu/excitementproject/eop/util/edaexperimenter/data/DataHandling.java b/util/src/main/java/eu/excitementproject/eop/util/edaexperimenter/data/DataHandling.java index feac357d..269ce9b6 100644 --- a/util/src/main/java/eu/excitementproject/eop/util/edaexperimenter/data/DataHandling.java +++ b/util/src/main/java/eu/excitementproject/eop/util/edaexperimenter/data/DataHandling.java @@ -9,6 +9,7 @@ import java.util.SortedMap; import java.util.TreeMap; +import org.apache.log4j.Level; import org.apache.log4j.Logger; import edu.stanford.nlp.util.StringUtils; @@ -283,8 +284,9 @@ private static HashMap makeMap( // not sure how to do this ... it is used (for now) to enforce the training/testing split ratio when clusters are not split (inside) private static SortedMap> enforceRatio( SortedMap> folds, double ratio) { - // TODO Auto-generated method stub + Logger logger = Logger.getLogger("eu.excitementproject.eda-exp.experimenter.ExperimenterFileUtils / enforceRatio"); + logger.setLevel(Level.INFO); for(Integer i: folds.keySet()) { logger.info(" key=" + i + " val=" + folds.get(i)); @@ -316,12 +318,17 @@ public static HashMap>> filterData( * @param cluster the name of the cluster to be balanced */ private static HashMap> balanceCluster(HashMap>> dataRaw, String cluster) { + + Logger logger = Logger.getLogger("eu.excitementproject.eda-exp.experimenter.ExperimenterFileUtils / balanceCluster"); + logger.setLevel(Level.INFO); HashMap> oldCluster = dataRaw.get(cluster); HashMap> balancedCluster = new HashMap>(); + logger.info("Balancing clusters!"); + // adjust how close to the 50/50 ratio we should get with undersampling. I'm allowing here a bit more of the majority class - double ratio = 1.1; + double ratio = 1.0; int min = Integer.MAX_VALUE; for(String cls: oldCluster.keySet()) { @@ -333,7 +340,8 @@ private static HashMap> balanceCluster(HashMap setX = new ArrayList(); setX.addAll(set); Collections.shuffle(setX); @@ -359,6 +372,8 @@ private static Set undersample(Set set, double d) { } } + logger.info("\tnew set size: " + newSet.size()); + return newSet; } } diff --git a/util/src/main/java/eu/excitementproject/eop/util/edaexperimenter/experimenter/Experimenter.java b/util/src/main/java/eu/excitementproject/eop/util/edaexperimenter/experimenter/Experimenter.java index fcaf3bce..5809d2ad 100644 --- a/util/src/main/java/eu/excitementproject/eop/util/edaexperimenter/experimenter/Experimenter.java +++ b/util/src/main/java/eu/excitementproject/eop/util/edaexperimenter/experimenter/Experimenter.java @@ -1,5 +1,6 @@ package eu.excitementproject.eop.util.edaexperimenter.experimenter; +import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; @@ -54,6 +55,11 @@ public Experimenter(String[] args) { try{ parser.parseArgument(args); + File f = new File(options.output); + if (! f.exists() || !f.isDirectory()) { + f.mkdir(); + } + tmpTrainFile = options.output + "/" + tmpTrainFile; tmpTestFile = options.output + "/" + tmpTestFile; @@ -141,6 +147,7 @@ private void oneRun() { // make the list of arguments for the EOPRunner String[] args = new String[] {"-config", options.config, "-train", "-trainFile", tmpTrainFile, "-test", "-testFile", tmpTestFile, "-output", options.output, "-score"}; + logger.info("Running the EOP with arguments: " + StringUtils.join(args," ")); // System.out.println("Running the EOP with arguments: " + StringUtils.join(args," ")); diff --git a/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunner.java b/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunner.java index 0c9a074a..57f957d5 100644 --- a/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunner.java +++ b/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunner.java @@ -1,30 +1,17 @@ package eu.excitementproject.eop.util.runner; -import java.io.BufferedReader; - import java.io.BufferedWriter; import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationTargetException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.HashMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.nio.charset.StandardCharsets; import org.apache.commons.io.FileUtils; -import org.apache.commons.io.FilenameUtils; import org.apache.log4j.Logger; -import org.apache.uima.cas.CASException; import org.apache.uima.jcas.JCas; -import org.kohsuke.args4j.CmdLineException; import org.kohsuke.args4j.CmdLineParser; import eu.excitementproject.eop.common.EDABasic; @@ -34,16 +21,8 @@ import eu.excitementproject.eop.common.exception.ComponentException; import eu.excitementproject.eop.common.exception.ConfigurationException; import eu.excitementproject.eop.common.utilities.configuration.ImplCommonConfig; -import eu.excitementproject.eop.core.EditDistanceTEDecision; -import eu.excitementproject.eop.lap.LAPAccess; -import eu.excitementproject.eop.lap.LAPException; import eu.excitementproject.eop.lap.PlatformCASProber; -import eu.excitementproject.eop.lap.dkpro.OpenNLPTaggerDE; -import eu.excitementproject.eop.lap.dkpro.OpenNLPTaggerEN; -import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; -import eu.excitementproject.eop.lap.textpro.TextProTaggerIT; import eu.excitementproject.eop.util.eval.EDAScorer; -import static java.nio.file.StandardCopyOption.*; /** * @@ -55,7 +34,7 @@ * given test/hypothesis pairs. * */ -@SuppressWarnings("unused") + public class EOPRunner { // command line options @@ -70,13 +49,14 @@ public class EOPRunner { private String resultsFile = null; private String xmlResultsFile = null; + @SuppressWarnings("unused") private String language = "EN"; private CommonConfig config; private EDABasic eda = null; - private String configSection = "PlatformConfiguration"; +// private String configSection = "PlatformConfiguration"; private Logger logger; @@ -246,8 +226,11 @@ public void runEOPTest(String testDirStr, String outDir) { File outputDir = new File(outDir); logger.info("Copying configuration file in output directory " + outDir); - FileUtils.copyFileToDirectory(configFile, outputDir); - + try { + FileUtils.copyFileToDirectory(configFile, outputDir); + } catch (IOException e) { + logger.info("Problem copying the configuration file " + configFile.getName() + " to directory " + outputDir.getName()); + } // careful with the copying! The model file may have a relative path which must be first resolved! logger.info("Copying model in output directory " + outDir); @@ -255,7 +238,7 @@ public void runEOPTest(String testDirStr, String outDir) { if (modelFile != null && !modelFile.isEmpty()) { FileUtils.copyFileToDirectory(new File(modelFile), outputDir); } else { - logger.info("No model file found"); +// logger.info("No model file found"); } } catch (Exception e) { @@ -356,7 +339,9 @@ public void run() { logger.info("\t training file: " + trainFile + "\n\t training dir: " + trainDir); - lapRunner.runLAPOnFile(trainFile, trainDir); + if (! option.nolap) { + lapRunner.runLAPOnFile(trainFile, trainDir); + } } if (option.train) @@ -368,7 +353,9 @@ public void run() { logger.info("\t testing file: " + testFile + "\n\t testing dir: " + testDir); - lapRunner.runLAPOnFile(testFile, testDir); + if (! option.nolap) { + lapRunner.runLAPOnFile(testFile, testDir); + } } if (option.test) { diff --git a/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunnerCmdOptions.java b/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunnerCmdOptions.java index e21de3db..170b4ad3 100644 --- a/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunnerCmdOptions.java +++ b/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunnerCmdOptions.java @@ -22,6 +22,9 @@ public class EOPRunnerCmdOptions { @Option(name="-lap", usage="The LAP to be used") public String lap = null; + @Option(name="-nolap", usage="Even if train and test files are given, don't do preprocessing (useful for the Experimenter)") + public boolean nolap = false; + @Option(name="-text", usage="The text part of a text/hypothesis pair") public String text = ""; diff --git a/util/src/main/java/eu/excitementproject/eop/util/runner/LAPRunner.java b/util/src/main/java/eu/excitementproject/eop/util/runner/LAPRunner.java index e31959e9..baede3f0 100644 --- a/util/src/main/java/eu/excitementproject/eop/util/runner/LAPRunner.java +++ b/util/src/main/java/eu/excitementproject/eop/util/runner/LAPRunner.java @@ -184,9 +184,14 @@ private String getDefaultLAPClass() { public void runLAPOnFile(String inputFile, String outDir) { logger.info("Running lap on file: " + inputFile + " // writing output to directory " + outDir); + + File dir = new File(outDir); + if (! dir.exists() || !dir.isDirectory()) { + dir.mkdir(); + } try { - lap.processRawInputFormat(new File(inputFile), new File(outDir)); + lap.processRawInputFormat(new File(inputFile), dir); } catch (LAPException e) { System.err.println("Error running the LAP"); e.printStackTrace(); diff --git a/util/src/main/java/eu/excitementproject/eop/util/runner/OutputUtils.java b/util/src/main/java/eu/excitementproject/eop/util/runner/OutputUtils.java index 8f0c8724..c31f251e 100644 --- a/util/src/main/java/eu/excitementproject/eop/util/runner/OutputUtils.java +++ b/util/src/main/java/eu/excitementproject/eop/util/runner/OutputUtils.java @@ -48,7 +48,7 @@ public static HashMap readResults(String file) { m = p.matcher(line); if (m.matches()) { results.put(m.group(1), m.group(2)); - logger.info("Added result: " + m.group(1) + " / " + m.group(2)); +// logger.info("Added result: " + m.group(1) + " / " + m.group(2)); } } reader.close();