diff --git a/adarte/pom.xml b/adarte/pom.xml new file mode 100644 index 00000000..1579bd3d --- /dev/null +++ b/adarte/pom.xml @@ -0,0 +1,51 @@ + + 4.0.0 + + eu.excitementproject + eop + 1.2.1 + + adarte + + + eu.excitementproject + common + 1.2.0 + + + eu.excitementproject + lap + 1.2.0 + + + eu.excitementproject + distsim + 1.2.0 + + + eu.excitementproject + core + 1.2.0 + + + nz.ac.waikato.cms.weka + weka-stable + 3.6.11 + + + fbk + treeditdistance + 1.0 + + + org.apache.uima + uimafit-core + 2.1.0 + + + http://maven.apache.org + adarte + + UTF-8 + + \ No newline at end of file diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/AdArte.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/AdArte.java new file mode 100644 index 00000000..715619b5 --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/AdArte.java @@ -0,0 +1,898 @@ +package eu.excitementproject.eop.adarte; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.ArrayList; +import java.util.logging.ConsoleHandler; +import java.util.logging.Handler; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.io.File; +import java.lang.reflect.Constructor; + +import org.apache.uima.jcas.JCas; +import org.uimafit.util.JCasUtil; + +import eu.excitementproject.eop.common.DecisionLabel; +import eu.excitementproject.eop.common.EDABasic; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.common.TEDecision; +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.configuration.NameValueTable; +import eu.excitementproject.eop.common.exception.ComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitement.type.entailment.Pair; +import eu.excitementproject.eop.lap.PlatformCASProber; + + +/** + * The AdArte class implements the EDABasic interface. + * Given a certain configuration, it can be trained over a specific data set in order to optimize its + * performance. + * + * This EDA is based on modeling the Entailment Relations (i.e., Entailment, Not-Entailment) as a + * classification problem. First texts (T) are mapped into hypothesis (H) by sequences of editing operations + * (i.e., insertion, deletion, substitution of text portions) needed to transform T into H, where each edit + * operation has a cost associated with it. Then, and this is different from the algorithms which use these + * operations to calculate a threshold value that best separates the Entailment Relations from the Not-Entailment + * ones, the proposed algorithm uses the calculated operations as a feature set to feed a Supervised Learning + * Classifier System being able to classify the relations between T and H. + * + * + * @author roberto zanoli + * @author silvia colombo + * + * @since January 2015 + * + */ +public class AdArte + implements EDABasic { + + /** + * + * EDA's variables section + * + */ + + /** + * the edit distance component to be used + */ + private FixedWeightTreeEditDistance component; //it calculates tree edit distance among trees + + /** + * the logger + */ + private final static Logger logger = + Logger.getLogger(AdArte.class.getName()); + + /** + * the training data directory + */ + private String trainDIR; + + /** + * the test data directory + */ + private String testDIR; + + /** + * save the training data set into arff format so that one + * can do experiments by using the WEKA Explorer too. + */ + private String saveDataSetInArffFormat; + + /** + * if true the transformations involving matches are considered as features + */ + private boolean match; + + /** + * if true the transformations involving insertions are considered as features + */ + private boolean insertion; + + /** + * if true the transformations involving deletions are considered as features + */ + private boolean deletion; + + /** + * if true the transformations involving replacements are considered as features + */ + private boolean replacement; + + /** + * How the transformations that are used as features have to be + * represented (i.e. LeastSpecificForm | IntermediateForm | GeneralForm) + */ + private String transformationForm; + + /** + * verbosity level + */ + private String verbosityLevel; + + /** + * get the component used by the EDA to calculate the transformations + * + * @return the component + */ + public FixedWeightTreeEditDistance getComponent() { + + return this.component; + + } + + /** + * get the type of component + * + * @return the type of component + */ + protected String getType() { + + return this.getClass().getCanonicalName(); + + } + + /** + * get the training data directory + * + * @return the training directory + */ + public String getTrainDIR() { + + return this.trainDIR; + + } + + /** + * set the training data directory + * + */ + public void setTrainDIR(String trainDIR) { + + this.trainDIR = trainDIR; + + } + + /** + * get the test data directory + * + * @return the test directory + */ + public String getTestDIR() { + + return this.testDIR; + + } + + /** + * set the test data directory + * + */ + public void setTestDIR(String testDIR) { + + this.testDIR = testDIR; + + } + + /* + * if we are training the EDA + */ + boolean trainingOperation; + + + /** + * + * Classifier's variables section; classifier is the + * classifier used by the EDA to classifier T/H pairs + * + */ + + /** + * The actual classifier + */ + //private Classifier classifier; + private MyClassifier classifier; + + /** + * The classifier model to be trained during the training phase + * and tested during the testing phase. + */ + private String classifierModel; + + /** + * The feature set used for training and testing + */ + //private Map featuresList; + + /** + * The data set (it is build by using the weka data structures), + * for training and test. + */ + //private Instances inputDataset; + + /** + * The annotation classes, e.g. ENTAILMENT, NONENTAILMENT + */ + //private FastVector classesList; + + /** + * Binary vs weighted features + */ + private boolean binaryFeature; + + /** + * test examples + */ + private DataSet testDataSet; + + /** + * The classifier evaluation; it contains methods for getting + * a number of measures like precision, recall and F1 measure. + */ + //private Evaluation evaluation; + + /** + * number of folds for cross-validation + */ + //private final int numFolds = 10; + + /** + * If an evaluation has to be done on the + * training data set, e.g. cross validation + * */ + private boolean crossValidation; + + /** + * Construct an TransformationDriven EDA + */ + public AdArte() { + + this.component = null; + this.classifier = null; + this.crossValidation = false; + this.binaryFeature = true; //weighted feature is not possible in the current implementation + //this.evaluation = null; + this.trainDIR = null; + this.testDIR = null; + this.saveDataSetInArffFormat = null; + //this.classesList = null; + //this.featuresList = null; + this.deletion = true; + this.match = false; + this.replacement = true; + this.insertion = true; + this.trainingOperation = false; + //this.testDataSet = null; + + } + + + @Override + public void initialize(CommonConfig config) + throws ConfigurationException, EDAException, ComponentException { + + try { + + // checking the configuration file + checkConfiguration(config); + + // getting the name value table of the EDA; it contains the methods + // for getting the EDA configuration form the configuration file. + NameValueTable nameValueTable = config.getSection(this.getType()); + + // setting the logger verbosity level: INFO, FINE, FINER + if (this.verbosityLevel == null) { + this.verbosityLevel = nameValueTable.getString("verbosity-level"); + + //logger.setUseParentHandlers(false); + //ConsoleHandler consoleHandler = new ConsoleHandler(); + //consoleHandler.setLevel(Level.parse(this.verbosityLevel)); + //logger.addHandler(consoleHandler); + //logger.setLevel(Level.parse(this.verbosityLevel)); + + replaceConsoleHandler(Logger.getLogger(""), Level.ALL); + logger.setLevel(Level.parse(this.verbosityLevel)); + + } + + // setting the training directory + if (this.trainDIR == null) + this.trainDIR = nameValueTable.getString("trainDir"); + + // setting the test directory + if (this.testDIR == null) + this.testDIR = nameValueTable.getString("testDir"); + + // evaluation on the training data set + if (this.crossValidation == false) + this.crossValidation = Boolean.parseBoolean(nameValueTable.getString("cross-validation")); + + // binary vs weighted features + if (this.binaryFeature == false) + this.binaryFeature = Boolean.parseBoolean(nameValueTable.getString("binary-feature")); + + // if the training data set has to be saved in arff format to be used with the weka explorer tool, + // null for non set + if (this.saveDataSetInArffFormat == null) + this.saveDataSetInArffFormat = nameValueTable.getString("save-arff-format"); + + // decide which type of transformations (i.e. match, insertion, deletion, substitution) has to be + // considered as features. True for considering it, false otherwise + String enambledTransforations = nameValueTable.getString("transformations"); + if (enambledTransforations.indexOf(Transformation.MATCH) != -1) + this.match = true; + + if (enambledTransforations.indexOf(Transformation.DELETION) != -1) + this.deletion = true; + + if (enambledTransforations.indexOf(Transformation.INSERTION) != -1) + this.insertion = true; + + if (enambledTransforations.indexOf(Transformation.REPLACE) != -1) + this.replacement = true; + + // the transformation representation to be used for representing the transformation + // i.e. LeastSpecificForm, IntermediateForm, GeneralForm + this.transformationForm = nameValueTable.getString("transformation-form"); + + // classifier initialization + String classifierName = nameValueTable.getString("classifier"); + + // getting the classifier parameters + String[] classifierParameters = nameValueTable.getString("classifier-parameters").split(" "); + + // the classifier model trained during the training phase and to be used during the test phase. + this.classifierModel = nameValueTable.getString("classifier-model"); + + // initialize the classifier + if (this.trainingOperation == true) { + + classifier = new MyClassifier(classifierName, classifierParameters, this.classifierModel); + + } + //load the classifier built during the training phase + else { + + classifier = new MyClassifier(this.classifierModel); + + } + + // calling FixedWeightTreeEditDistance with its default configuration + //component = new FixedWeightTreeEditDistance(); + + // component initialization through the configuration file + String componentName = nameValueTable.getString("components"); + + // componentName = "eu.excitementproject.eop.core.component.distance.FixedWeightTreeEditDistance"; + if (this.component == null) { + + try { + + Class componentClass = Class.forName(componentName); + Constructor componentClassConstructor = componentClass.getConstructor(CommonConfig.class); + this.component = (FixedWeightTreeEditDistance) componentClassConstructor.newInstance(config); + + } catch (Exception e) { + + throw new ComponentException(e.getMessage()); + + } + + } + + logger.info("EDA configuration:" + "\n" + + "training directory:" + this.trainDIR + "\n" + + "testing directory:" + this.testDIR + "\n" + + "match transformation enabled:" + this.match + "\n" + + "deletion transformation enabled:" + this.deletion + "\n" + + "insertion transformation enabled:" + this.insertion + "\n" + + "replacement transformation enabled:" + this.replacement + "\n" + + "transformation representation:" + this.transformationForm + "\n" + + "classifier:" + this.classifier.toString() + "\n" + + "classifier model name:" + this.classifierModel + "\n" + + "cross-validation:" + this.crossValidation + "\n" + + "binary-feature:" + this.binaryFeature + "\n" + + "tree edit distance component:" + componentName + "\n" + ); + + } catch (ConfigurationException e) { + + throw e; + + } catch (Exception e) { + + throw new EDAException("Initialization error:" + e.getMessage()); + + } + + } + + + @Override + public EditDistanceTEDecision process(JCas jcas) + throws EDAException, ComponentException { + + // the predicted class + String annotationClass = null; + // the confidence assigned by the classifier to the class + double confidence = 0.0; + // the classified T/H pair + Pair pair = null; + + try { + + // get the T/H pair + pair = JCasUtil.selectSingle(jcas, Pair.class); + logger.info("processing pair: " + pair.getPairID() + "\n" + + "Text: " + pair.getText().getCoveredText() + "\n" + + "Hypothesis: " + pair.getHypothesis().getCoveredText()); + + /** + * this records the gold standard answer for this pair. If the pair + * represents a training data, this value is the gold standard answer. If + * it is a null value, the pair represents a problem that is yet to be answered. + */ + String goldAnswer = pair.getGoldAnswer(); //get gold annotation + + // get the distance between T and H + double distance = component.calculation(jcas).getDistance(); + + // get the transformations needed to transform T into H + List transformations = component.getTransformations(); + + // binary feature + // HashSet example_i = new HashSet(); + // weighted feature + HashMap example_i = new HashMap(); + + //save the transformations to be printed into the log file + StringBuffer loggerTransformationsBuffer = new StringBuffer(); + loggerTransformationsBuffer.append("number of transformations:" + transformations.size()); + loggerTransformationsBuffer.append("\n"); + + int transformations_counter = 0; + Iterator iteratorTransformation = transformations.iterator(); + while(iteratorTransformation.hasNext()) { + + transformations_counter++; + + Transformation transformation_i = iteratorTransformation.next(); + + String transformation_i_name = + transformation_i.print(this.replacement, this.match, this.deletion, this.insertion, this.transformationForm); + + loggerTransformationsBuffer.append("transformation " + + transformations_counter + ":" + + transformation_i + "\n"); + + if (transformation_i_name == null) + continue; + + + if (this.classifier.containsFeature(transformation_i_name)) { + //weighted feature + int weight = 1; + if (example_i.keySet().contains(transformation_i_name)) { + weight = example_i.get(transformation_i_name).intValue() + 1; + } + example_i.put(transformation_i_name, new Integer(weight)); + } + + } + + // data structure for storing gold annotations (e.g. ENTAILMENT) + ArrayList annotation = new ArrayList(); + + // data structure for storing the examples to be used for training + List> examples = new ArrayList>(); + + // adding example_i into the list of the examples + examples.add(example_i); + + // adding the annotation of the example_i + if (goldAnswer != null) + annotation.add(goldAnswer); //the annotation is in the test set + else + annotation.add("?"); //the annotation is not in the test set + + //initialize the data set (i.e. declare attributes and classes) + //Instances testDataSet = initDataSet(); + DataSet testDataSet = new DataSet(classifier.getFeaturesList(), classifier.getClassesList()); + + //fill the data set + testDataSet.addExamples(examples, annotation); + + if (this.saveDataSetInArffFormat != null) { + if (this.testDataSet == null) + this.testDataSet = new DataSet(classifier.getFeaturesList(), classifier.getClassesList()); + this.testDataSet.addExamples(examples, annotation); + } + + //the classifier returns with a confidence level for each of the possible + //classes; following we look for the most probable classes and report + //the confidence assigned to this class by the classifier + double[] score = classifier.testClassifier(testDataSet); + int index = 0; + for (int i = 0; i < score.length; i++) { + if (score[i] >= confidence) { + confidence = score[i]; + index = i; + } + } + + //get the class label (e.g. Entailment) + annotationClass = testDataSet.getData().attribute("class").value(index); + //System.err.println("classAttribute:" + testDataSet.getData().classAttribute()); + //System.err.println("firstInstance:" + testDataSet.getData().firstInstance()); + + logger.fine("gold standard class label: " + goldAnswer + "\n" + + "predicted class:" + annotationClass + "\n" + + "calculated distance:" + distance + "\n\n" + + loggerTransformationsBuffer.toString() + "\n"); + + logger.finer("data set format:" + testDataSet); + + } catch (Exception e) { + + throw new EDAException("Annotating error:" + e.getMessage()); + + } + + DecisionLabel decisionLabel = DecisionLabel.getLabelFor(annotationClass); + + return new EditDistanceTEDecision(decisionLabel, pair.getPairID(), confidence); + + } + + + @Override + public void shutdown() { + + logger.info("shutdown ..."); + + if (component != null) { + ((FixedWeightTreeEditDistance)component).shutdown(); + } + this.component = null; + this.classifier = null; + this.crossValidation = false; + this.binaryFeature = true; + //this.evaluation = null; + this.trainDIR = null; + this.testDIR = null; + //this.classesList = null; + //this.featuresList = null; + this.deletion = true; + this.match = false; + this.replacement = true; + this.insertion = true; + + try { + if (this.trainingOperation == false) { + this.testDataSet.saveDataSet(this.saveDataSetInArffFormat + "_test"); + } + } catch (Exception e) { + logger.warning(e.getMessage()); + } + + this.saveDataSetInArffFormat = null; + this.trainingOperation = false; + + logger.info("done."); + + } + + + @Override + public void startTraining(CommonConfig config) throws ConfigurationException, EDAException, ComponentException { + + logger.info("training ..."); + + //this is a training phase + this.trainingOperation = true; + + try { + + //initialize the EDA + initialize(config); + + //if there are no files for training + File f = new File(trainDIR); + if (f.exists() == false) { + throw new ConfigurationException(trainDIR + ":" + f.getAbsolutePath() + " not found!"); + } + + //data structure for storing gold annotations (e.g. ENTAILMENT) + //for each of the example in the data set + ArrayList annotation = new ArrayList(); + + //data structure for storing the examples to be used for training + //binary feature + //List> examples = new ArrayList>(); + + //weighted features + List> examples = new ArrayList>(); + + File[] files = f.listFiles(); + //sort files on the bases of their id + Arrays.sort(files); + + //reading the training data set + for (File xmi : files) { + if (!xmi.getName().endsWith(".xmi")) { + continue; + } + + //System.err.println(xmi.getName()); + //logger.finer("file: " + xmi.getName()); + + //fileCounter++; + //if (fileCounter >100) + // break; + + // The annotated pair is added into the CAS. + JCas jcas = PlatformCASProber.probeXmi(xmi, null); + + //the T/H pair + Pair pair = JCasUtil.selectSingle(jcas, Pair.class); + logger.info("processing pair: " + pair.getPairID() + "\n" + + "Text: " + pair.getText().getCoveredText() + "\n" + + "Hypothesis: " + pair.getHypothesis().getCoveredText()); + + //the pair annotation + String goldAnswer = pair.getGoldAnswer(); //get gold annotation + + //get the distance between T and H + double distance = component.calculation(jcas).getDistance(); + + //get the transformations to transform T into H + List transformations = component.getTransformations(); + + //binary feature + //HashSet example_i = new HashSet(); + //weighted feature + HashMap example_i = new HashMap(); + + //save the transformations to be printed into the log file + StringBuffer loggerTransformationsBuffer = new StringBuffer(); + loggerTransformationsBuffer.append("number of transformations:" + transformations.size()); + loggerTransformationsBuffer.append("\n"); + + int transformation_counter = 0; + + Iterator iteratorTransformation = transformations.iterator(); + while(iteratorTransformation.hasNext()) { + Transformation transformation_i = iteratorTransformation.next(); + transformation_counter++; + + String transformation_i_name = + transformation_i.print(this.replacement, this.match, this.deletion, this.insertion, this.transformationForm); + + loggerTransformationsBuffer.append("transformation " + transformation_counter + ":" + transformation_i); + loggerTransformationsBuffer.append("\n"); + + if (transformation_i_name == null) + continue; + + if (classifier.addFeature(transformation_i_name)); + + //weighted feature + int weight = 1; + if (example_i.keySet().contains(transformation_i_name)) { + weight = example_i.get(transformation_i_name).intValue() + 1; + } + example_i.put(transformation_i_name, new Integer(weight)); + + //binary feature + //example_i.add(transformation_i_name); + + } + + //commentare per rimuovere feature distanza + //example_i.add("distance:" + (Math.floor(distance * 100.0) / 100.0)); + + //creating the classes index starting from 0 + classifier.addClass(goldAnswer); + + //add the example_i into the list of the examples + examples.add(example_i); + //add the annotation of the example_i + annotation.add(goldAnswer); + + logger.fine("gold standard class label: " + goldAnswer + "\n" + + "calculated distance:" + distance + "\n\n" + + loggerTransformationsBuffer.toString() + "\n"); + + } + + //init the data set (i.e. attribute and classes declaration) for + //training the classifier + //Instances trainingDataSet = initDataSet(); + DataSet trainingDataSet = new DataSet(classifier.getFeaturesList(), classifier.getClassesList()); + + //fill the data set for training the classifier + //fillDataSet(trainingDataSet, examples, annotation); + trainingDataSet.addExamples(examples, annotation); + + logger.finer("data set:" + trainingDataSet); + + logger.info("number of examples:" + examples.size() + "\n" + + "number of features:" + (classifier.getFeaturesSetSize()-1) + "\n" + //-1 due to the fake_attribute + "number of classes:" + (classifier.getClassesListSize()-1) + "\n" //-1 due to the fake class + //logger.info("input data set:\n" + inputDataset);//the data set on arff format + ); + + //save the data set into arff format + if (this.saveDataSetInArffFormat != null) + trainingDataSet.saveDataSet(this.saveDataSetInArffFormat + "_training"); + //this.saveDataSet(trainingDataSet); + + //train the classifier + classifier.trainClassifier(trainingDataSet); + + //cross-validation + if (this.crossValidation == true) { + classifier.evaluateModel(trainingDataSet); + } + + } catch (Exception e) { + + throw new EDAException("Training error:" + e.getMessage()); + + } + + logger.info("done."); + + } + + + /** + * Checks the configuration and raise exceptions if the provided + * configuration is not compatible with this class + * + * param config the configuration + * + * @throws ConfigurationException + */ + private void checkConfiguration(CommonConfig config) + throws ConfigurationException { + + //if (config == null) + //throw new ConfigurationException("Configuration file not found."); + + } + + + /** + * Get a summary description of the classifier evaluation + * + * @return the summary + */ + /* + public String toSummaryString() { + + return evaluation.toSummaryString(); + + } + */ + + + /** + * calculate the estimated error rate + * + * @return the estimated error rate + */ + /* + public double errorRate() { + + return evaluation.errorRate(); + + } + */ + + + /** + * Replaces the ConsoleHandler for a specific Logger with one that will log + * all messages. + * + * @param logger the logger to update. + * @param newLevel the new level to log. + */ + private static void replaceConsoleHandler(Logger logger, Level newLevel) { + + // Handler for console (reuse it if it already exists) + Handler consoleHandler = null; + // see if there is already a console handler + for (Handler handler : logger.getHandlers()) { + if (handler instanceof ConsoleHandler) { + consoleHandler = handler; + break; + } + } + + if (consoleHandler == null) { + // there was no console handler found, create a new one + consoleHandler = new ConsoleHandler(); + logger.addHandler(consoleHandler); + } + // set the console handler to fine: + consoleHandler.setLevel(newLevel); + } + + + /* + public static void main(String args[]) { + + TransformationDrivenEDA tdEDA; + + try { + + tdEDA = new TransformationDrivenEDA(); + + File configFile = new File("./src/main/resources/configuration-file/TransformationDrivenEDA_EN.xml"); + + CommonConfig config = new ImplCommonConfig(configFile); + + //tdEDA.test(); + LAPAccess lap = new MaltParserEN(); + // process TE data format, and produce XMI files. + // Let's process English RTE3 data (formatted as RTE5+) as an example. + + + + + File input = new File("/hardmnt/norris0/zanoli/TBMLEDA/dataset/SICK_train.xml"); + + File outputDir = new File("/tmp/training"); + + try { + + lap.processRawInputFormat(input, outputDir); // outputDir will have those XMIs + + System.out.println(input); + + lap.processRawInputFormat(input, outputDir); // outputDir will have those XMIs + } catch (Exception e) + { + System.err.println(e.getMessage()); + } + + tdEDA.startTraining(config); + + tdEDA.shutdown(); + System.exit(0); + + System.exit(0); + + tdEDA.initialize(config); + + File f = new File("/home/scolombo/tbmleda/tmpfiles/"); + + //build up the dataset from training data + for (File xmi : f.listFiles()) { + + if (!xmi.getName().endsWith(".xmi")) { + continue; + } + + // The annotated pair is added into the CAS. + JCas jcas = PlatformCASProber.probeXmi(xmi, null); + EditDistanceTEDecision edtedecision = tdEDA.process(jcas); + System.err.println(edtedecision.getPairID() + "\t" + + edtedecision.getDecision() + " " + + edtedecision.getConfidence()); + + } + + } catch (Exception e) { + System.err.println(e.getMessage()); + } + + } + */ + +} diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/Alignments.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/Alignments.java new file mode 100644 index 00000000..c6e6ac02 --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/Alignments.java @@ -0,0 +1,164 @@ +package eu.excitementproject.eop.adarte; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.uima.jcas.JCas; +import org.uimafit.util.JCasUtil; + +import eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; +import eu.excitement.type.alignment.GroupLabelInferenceLevel; +import eu.excitement.type.alignment.Link; + +/** + * + * This class contains the alignments produced by the lexical aligner between T and H. + * + * 3 different types of alignments have been defined: + * LOCAL_ENTAILMENT (e.g. WORDNET__SYNONYM) + * LOCAL_CONTRADICTION (e.g. WORDNET__ANTONYM) + * LOCAL_SIMILARITY (e.g. WORDNET__SIMILAR_TO) + * + * Given two words, the method getAlignmentType of the class returns the type of alignment that there + * exists between the two tokens. + * + * @author roberto zanoli + * @author silvia colombo + * + * @since January 2015 +*/ + +// The used aligner component has been deprecated and it will be replaced in the next code release. +@SuppressWarnings("deprecation") +public class Alignments { + + // Entailment relations type + public static String LOCAL_ENTAILMENT = GroupLabelInferenceLevel.LOCAL_ENTAILMENT.toString(); + public static String LOCAL_CONTRADICTION = GroupLabelInferenceLevel.LOCAL_CONTRADICTION.toString(); + public static String LOCAL_SIMILARITY = GroupLabelInferenceLevel.LOCAL_SIMILARITY.toString(); + + // Entailment relations direction + public static String DIRECTION_HtoT = Link.Direction.HtoT.toString(); + public static String DIRECTION_TtoH = Link.Direction.TtoH.toString(); + public static String DIRECTION_Bidirection = Link.Direction.Bidirection.toString(); + + // It contains the words that have been aligned with their links + // The key is like: token1__token2, where `___` separates the 2 tokens, e.g. assassin__killer + // while the value is the link produced by the alingner component, + private Map alignments; + + // The lexical aligner component to be used to create the alignments + private LexicalAligner aligner; + + /** + * The constructor + */ + public Alignments() { + + alignments = new HashMap(); + + } + + /** + * The constructor + * + * @param aligner The lexical aligner to be used for creating the alignments + * @param jcas the CAS containing T and H to be aligned. + * + */ + public Alignments(LexicalAligner aligner, JCas jcas) throws Exception { + + this(); + + this.aligner = aligner; + + try { + + // Call the aligner component to get the alignments between T and H + if (this.aligner != null) { + //logger.finer("\ngetting the alignments ..."); + this.aligner.annotate(jcas); + //logger.finer("done."); + } + + //get the HYPOTHESIS view + JCas hypoView = jcas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + //cycle through the alignments + for (Link link : JCasUtil.select(hypoView, Link.class)) { + + String key = link.getTSideTarget().getCoveredText().replaceAll(" ", "_|_") + + "__" + + link.getHSideTarget().getCoveredText().replaceAll(" ", "_|_"); + + //for a couple of tokens it can save a type of alignment only (the first) in the + //order as provided by the aligner component. + if (!alignments.containsKey(key)) + alignments.put(key, link); + + } + + } catch (Exception e) { + + throw new Exception(e.getMessage()); + + } + + } + + + /** + * Given 2 tokens, token1 and token2 it says if there is a local LOCAL_ENTAILMENT, LOCAL_CONTRADICTION or LOCAL_SIMILARITY + * between the 2 tokens. + * + * @param token1 + * @param token2 + * + * @return an array of 3 values: + * + * 1) The type of alignment: LOCAL_ENTAILMENT | LOCAL_CONTRADICTION | LOCAL_SIMILARITY + * 2) The direction of the entailment: TtoH | HtoT | Bidirectional + * 3) The alignment relation rule: // HYPERNYM | SYNONYM | ... + * + */ + protected String[] getAlignment(FToken token1, FToken token2, String wordMatch) { + + String[] result = new String[3]; + + String alignmentType = null;; + + //if there is a match between the two tokens we have LOCAL_ENTAILMENT + if (token1.match(token2, wordMatch)) { + alignmentType = LOCAL_ENTAILMENT; + result[0] = alignmentType; + result[1] = DIRECTION_Bidirection; + //result[2] = null; //default value + } + // possible ALIGNMENTS only when the dprel relation (see dependency parsing relations) of the 2 tokens + // is the same. + else if (token1.getDprel().equals(token2.getDprel())) { + + //the alignment between token1 and token2 + Link alignment = alignments.get(token1.getForm() + "__" + token2.getForm()); + + // LOCAL_ENTAILMENT | LOCAL_CONTRADICTION | LOCAL_SIMILARITY + if (alignment != null && alignment.getGroupLabelsInferenceLevel().size() != 0) { + + //take the first valid alignment; this could be an issue in case of contrasting multiple alignments. + alignmentType = (alignment.getGroupLabelsInferenceLevel().iterator().next().toString()); + + result[0] = alignmentType; //LOCAL_ENTAILMENT | LOCAL_CONTRADICTION | LOCAL_SIMILARITY + result[1] = alignment.getDirectionString(); // TtoH | HtoT | Bidirectional + result[2] = alignment.getLinkInfo(); // e.g. HYPERNYM | SYNONYM in WordNet based aligner + + } + + } + + return result; + + } + + +} diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/DataSet.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/DataSet.java new file mode 100644 index 00000000..97dc3a41 --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/DataSet.java @@ -0,0 +1,264 @@ +package eu.excitementproject.eop.adarte; + +import java.io.BufferedWriter; +import java.io.FileOutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.SortedSet; +import java.util.TreeSet; + +import weka.core.Attribute; +import weka.core.FastVector; +import weka.core.Instance; +import weka.core.Instances; +import weka.core.SparseInstance; + + +/** + * + * The data set to train and test the classifier + * + * @author roberto zanoli + * @author silvia colombo + * + * @since January 2015 + * + */ +public class DataSet { + + /** + * The data set (it is build by using the weka data structures), + */ + private Instances data; + //the features + private Map featuresSet; + //the list of class labels + private FastVector classLabels; + + + /** + * Get the data set + */ + protected Instances getData() { + + return this.data; + + } + + /** + * The constructor that initializes the data set declaring its attributes (features and classes) + * + * @param featuresSet the features + * @param classLabels the classes + * + */ + public DataSet(Map featuresSet, FastVector classLabels) throws Exception { + + this.featuresSet = featuresSet; + this.classLabels = classLabels; + + try { + + //1) defining the attributes; basically each of the + //extracted features is a new attribute, e.g. + //@attribute attr6 numeric + //@attribute attr7 numeric + //@attribute attr8 numeric + FastVector attributes = new FastVector(); + + for (Entry entry : entriesSortedByValues(featuresSet)) { + String featureName = entry.getKey(); + + //commentare per rimuovere feature distanza + //if (featureName.indexOf("distance:") != -1) + //featureName = featureName.split(":")[0]; + + //each of the extracted features is a new attribute + Attribute attribute_i = new Attribute(featureName); + //adding the attribute_i into the list of the attributes + //System.err.println(attribute_i + "\t" + featuresList.get(featureName)); + attributes.addElement(attribute_i); + //logger.info("adding attribute_i:" + attributes.size()); + } + + // 2) defining the class attribute, e.g. + //@attribute class {null,ENTAILMENT,NONENTAILMENT} + + Attribute attribute_class = new Attribute("class", classLabels); + //logger.info("adding class attribute:" + attribute_class); + attributes.addElement(attribute_class); + + //create the data set named 'dataset', e.g. + //@relation dataset + data = new Instances("dataset", attributes, 0); + + //the last attribute is the class + //inputDataset.setClassIndex(featuresList.size()); + data.setClassIndex(data.numAttributes() - 1); + //logger.info("data set:\n" + inputDataset); + + } catch (Exception e) { + + throw new Exception("Data Set initialization error:" + e.getMessage()); + + } + + } + + + /** + * Adding data (i.e. examples) into the defined data set + * + * @param examples the examples to be added + * @param annotation the class labels of the examples + * @param + * + */ + //binary features + //private void fillDataSet(List> examples, List annotation) + //weighted features + protected void addExamples(List> examples, List annotation) throws Exception { + + try { + + //creating an instance for each of the examples + for (int i = 0; i < examples.size(); i++) { + + //getting the example_i + //bninary feature + //HashSet example_i = examples.get(i); + + //weighted features + HashMap example_i = examples.get(i); + + //logger.info("example_i:" + example_i); + //an array of size(featuresList)+1 values + double[] initValues = new double[featuresSet.size() + 1]; + //creating a SPARSE instance i and initialize it so that + //its values are set to 0 + Instance instance_i = new SparseInstance(1.0, initValues);//1.0 is the instance weight + + //binary feature + //Iterator iterator_j = example_i.iterator(); + //weighted feature + Iterator iterator_j = example_i.keySet().iterator(); + + while(iterator_j.hasNext()) { + + String feature_j = iterator_j.next(); + //logger.finer("feature j:" + feature_j); + + + //coommnetare per rimuovere feature distanza + /* + if (feature_j.indexOf("distance:") != -1) { + String new_feature_j = feature_j.split(":")[0]; + //System.err.println(feature_j); + int featureIndex = featuresList.get(new_feature_j); + //System.err.println(feature_j + "---"); + double weight = Double.parseDouble(feature_j.split(":")[1]); + //System.err.println(weight); + instance_i.setValue(featureIndex, weight);//1.0 is the feature weight + } + + //commentare per rimuovere feature distanza + else + */ + + if (featuresSet.containsKey(feature_j)) { + + //System.err.println(feature_j + "\t" + featuresList.get(feature_j)); + int featureIndex = featuresSet.get(feature_j); + //only the features with weight different from 0 are set + + //binary feature + //instance_i.setValue(featureIndex, 1.0);//1.0 is the feature weight + //weighted feature + + double weight = 1.0; + //if (this.binaryFeature == false) + // weight = example_i.get(feature_j).doubleValue(); + + instance_i.setValue(featureIndex, weight);//1.0 is the feature weight + //System.err.println("feature:" + feature_j + " " + featureIndex + " weight:" + weight); + + } + + } + + if (instance_i.numValues() == 0) { + int featureIndex; + featureIndex = featuresSet.get("fake_attribute"); + instance_i.setValue(featureIndex, 1.0);//1.0 is the feature weight + } + //the last value is that of the annotation class + + //System.err.println(classesList.size()); + //System.err.println("+++++++++++++:" + featuresList.size() + " " + classesList.indexOf(annotation.get(i)) + " " + annotation.get(i)); + + instance_i.setValue(featuresSet.size(), classLabels.indexOf(annotation.get(i))); + //adding the instance into the data set + data.add(instance_i); + + } + + } catch (Exception e) { + + throw new Exception("Creating data set error:" + e.getMessage()); + + } + + } + + + + /** + * Save the data set in arff format to be used with the WEKA Explorer + */ + protected void saveDataSet(String dataSetName) throws Exception { + + try { + + BufferedWriter writer = null; + writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(dataSetName, false), "UTF-8")); + + PrintWriter printout = new PrintWriter(writer); + printout.print(data); + printout.close(); + writer.close(); + + } catch (Exception e) { + + throw new Exception("Saving data set error:" + e.getMessage()); + + } + + } + + /** + * sort + */ + static > SortedSet> entriesSortedByValues(Map map) { + SortedSet> sortedEntries = new TreeSet>( + new Comparator>() { + @Override public int compare(Map.Entry e1, Map.Entry e2) { + int res = e1.getValue().compareTo(e2.getValue()); + return res != 0 ? res : 1; // Special fix to preserve items with equal values + } + } + ); + sortedEntries.addAll(map.entrySet()); + return sortedEntries; + } + + + + +} diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/DependencyTreeUtils.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/DependencyTreeUtils.java new file mode 100644 index 00000000..2af511d9 --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/DependencyTreeUtils.java @@ -0,0 +1,443 @@ +package eu.excitementproject.eop.adarte; + +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency; + +import org.apache.uima.jcas.JCas; +import static org.apache.uima.fit.util.JCasUtil.select; +import static org.apache.uima.fit.util.JCasUtil.selectCovered; + +/** + * + * This class contains same utility methods for managing the dependency trees and the + * text containing them. + * + * @author roberto zanoli + * @author silvia colombo + * + * @since January 2015 + * +*/ +public class DependencyTreeUtils { + + + /* + * check if the text contains multiple sentences. + * + * @param text the text + * + * @return true in case of multiple sentences + */ + protected static boolean checkMultiSentences(String text){ + + String[] trees = text.split("\n\n"); + + return trees.length>1; + + } + + + /* + * check if the text contains multiple trees. + * + * @param text the text + * + * @return true in case of multiple trees + */ + protected static boolean checkMultiTree(String text){ + + Pattern p = Pattern.compile("\t_\t_\t_\t_\t_\t_\n"); + Matcher m = p.matcher(text); + int count = 0; + while (m.find()){ + count +=1; + } + + if (count > 1) + return true; + + return false; + + } + + + /* + * check if the text contains PhrasalVerbs + * + * @param text the text + * + * @return true in case of PhrasalVerbs + */ + protected static boolean checkPhrasalVerbs(String text) { + + String[] splitLines = text.split("\n"); + for (int i = 0; i < splitLines.length; i++) { + String[] fields = splitLines[i].split("\t"); + if (fields[7].equalsIgnoreCase("prt")) + return true; + } + + return false; + + } + + + /* + * check if the text contains some punctuation + * + * @param text the text + * + * @return true in case of punctuation + */ + protected static boolean checkPunctuation(String text) { + + String[] splitLines = text.split("\n"); + for (int i = 0; i < splitLines.length; i++) { + String[] fields = splitLines[i].split("\t"); + if (fields[7].equalsIgnoreCase("punct") || fields[7].equalsIgnoreCase("PUNC")) + return true; + } + + return false; + + } + + + /* + * It creates a new tree where the provided trees have been attached in. + * + * @param trees + * + * @return a new tree + */ + protected static String createFakeTree(String multiTree) { + + StringBuffer newTree = new StringBuffer(); + + String newNode = "1\t_\t_\t_\t_\t_\t_\t_\t_\t_\n"; + newTree.append(newNode); + + String[] splitLines = multiTree.split("\n"); + for (int i = 0; i < splitLines.length; i++) { + + String[] tokens = splitLines[i].split("\t"); + + for (int j = 0; j < tokens.length; j++) { + + //token id, or link id + if (j == 0 || j == 6) { + + if (j == 6 && tokens[j].equals("_")) + tokens[j] = "1"; + else { + int num = Integer.parseInt(tokens[j]); + num++; + tokens[j] = Integer.toString(num); + } + } + newTree.append(tokens[j]); + if (j < tokens.length - 1) + newTree.append("\t"); + + } + newTree.append("\n"); + + } + + newTree.append("\n"); + return newTree.toString(); + + } + + + /* + * It merges the provided trees + * + * @param trees + * + * @return a new tree + */ + protected static String mergeTrees(String multiTree) { + + String[] trees = multiTree.split("\n\n"); + String newTree = ""; + //add new node + newTree+="1\t_\t_\t_\t_\t_\t_\t_\t_\t_\n"; + int prevtreelenght = 1; + + for (int i = 0; i < trees.length; i++){ + String tree = trees[i]; + String[] lines = tree.split("\n"); + for(int j = 0; j dependentMap = new HashMap(); + // Map of governor token address and its token position + Map dependencyMap = new HashMap(); + // Map of governor token address and its dependency function value + Map dependencyTypeMap = new HashMap(); + + for (Dependency dependecny : selectCovered(Dependency.class, sentence)) { + dependentMap.put(dependecny.getDependent() + .getAddress(), dependecny.getGovernor().getAddress()); + } + + int i = 1; + for (Token token : selectCovered(Token.class, sentence)) { + dependencyMap.put(token.getAddress(), i); + i++; + } + + for (Dependency dependecny : selectCovered(Dependency.class, sentence)) { + dependencyTypeMap.put(dependecny.getDependent().getAddress(), + dependecny.getDependencyType()); + } + + int j = 1; + + for (Token token : selectCovered(Token.class, sentence)) { + String lemma = token.getLemma() == null ? "_" : token.getLemma().getValue(); + String pos = token.getPos() == null ? "_" : token.getPos().getPosValue(); + String dependent = "_"; + + if (dependentMap.get(token.getAddress()) != null) { + if (dependencyMap.get(dependentMap.get(token.getAddress())) != null) { + dependent = "" + dependencyMap.get(dependentMap.get(token.getAddress())); + } + } + String type = dependencyTypeMap.get(token.getAddress()) == null ? "_" + : dependencyTypeMap.get(token.getAddress()); + + if (dependentMap.get(token.getAddress()) != null + && dependencyMap.get(dependentMap.get(token.getAddress())) != null + && j == dependencyMap.get(dependentMap.get(token.getAddress()))) { + // IOUtils.write(j + "\t" + token.getCoveredText() + "\t" + lemma + "\t" + pos + // + "\t_\t_\t" + 0 + "\t" + type + "\t_\t_\n", aOs, aEncoding); + result.append(j + "\t" + token.getCoveredText() + "\t" + lemma + "\t" + pos + + "\t_\t_\t" + 0 + "\t" + type + "\t_\t_\n"); + } + else { + //IOUtils.write(j + "\t" + token.getCoveredText() + "\t" + lemma + "\t" + pos + // + "\t_\t_\t" + dependent + "\t" + type + "\t_\t_\n", aOs, aEncoding); + result.append(j + "\t" + token.getCoveredText() + "\t" + lemma + "\t" + pos + + "\t_\t_\t" + dependent + "\t" + type + "\t_\t_\n"); + } + j++; + } + + //IOUtils.write("\n", aOs, aEncoding); + //System.out.print("\n"); + result.append("\n"); + + } + + } catch (Exception e) { + + throw new Exception(e.getMessage()); + + } + + return result.toString(); + + } + + + /** + * Given a dependency tree it removes the punctuation (the punct marker in the + * CoNLL-X file is used to recognize the punctuation). + * + * @param dependencyTree the tree + * + * @return the tree in input without the punctuation + * + */ + protected static String removePunctuation(String dependencyTree){ + + String cleaned_tree = ""; + + Boolean hasChild = false; + String[] lines = dependencyTree.split("\n"); + + for (int i = 0; i < lines.length; i++) { + if(!lines[i].isEmpty()){ + String[] fields = lines[i].split("\\s"); + int tokenId = Integer.parseInt(fields[0]); + //if(fields[7].equals("punct")){ + if(fields[7].equalsIgnoreCase("punct") || fields[7].equalsIgnoreCase("PUNC")){ //added by roberto for German language + //checking for children + for (int j = 0; j < lines.length; j++){ + if(!lines[j].isEmpty()){ + String[] fieldsj = lines[j].split("\\s"); + if(fieldsj[6].equals(tokenId+"")){ + hasChild = true; + } + } + } + //update stage + if (!hasChild) { + lines[i]=""; + for (int j = 0; j < lines.length; j++){ + if(!lines[j].isEmpty()) { + String[] fieldsj = lines[j].split("\\s"); + //updating the IDs for the deletion + if(Integer.parseInt(fieldsj[0]) >= tokenId) { + fieldsj[0] = (Integer.parseInt(fieldsj[0])-1)+""; + } + //updating the heads. I assume that the root cannot be a punctuation mark + if(!fieldsj[6].equals("_") && Integer.parseInt(fieldsj[6]) > tokenId) { + fieldsj[6] = (Integer.parseInt(fieldsj[6])-1)+""; + } + String line = ""; + for (String field:fieldsj) { + line+= field + "\t"; + } + lines[j]=line; + } + } + } + } + } + } + for (int i = 0; i < lines.length; i++){ + if(!lines[i].isEmpty()) + cleaned_tree+=lines[i]+"\n"; + } + + return cleaned_tree+"\n"; + + } + + /** + * Given a dependency tree it merges the nodes that are part of a same phrasal verb + * + * @param dependencyTree the tree + * + * @return the tree in input rith the nodes merged + * + */ + protected static String mergePhrasalVerbs(String dependencyTree){ + + String cleaned_tree = ""; + + Boolean hasChild = false; + String[] lines = dependencyTree.split("\n"); + + for (int i = 0; i < lines.length; i++) { + if(!lines[i].isEmpty()){ + String[] fields = lines[i].split("\\s"); + int tokenId = Integer.parseInt(fields[0]); + //if(fields[7].equals("punct")){ + if(fields[7].equalsIgnoreCase("prt")) { + String prepositionToken = fields[1]; + String prepositionLemma = fields[2]; + int prepositionHead = Integer.parseInt(fields[6]); + //checking for children + for (int j = 0; j < lines.length; j++){ + if(!lines[j].isEmpty()){ + String[] fieldsj = lines[j].split("\\s"); + if(fieldsj[6].equals(tokenId+"")) { + hasChild = true; + } + } + } + //update stage + if (!hasChild) { + lines[i]=""; + for (int j = 0; j < lines.length; j++){ + if(!lines[j].isEmpty()) { + String[] fieldsj = lines[j].split("\\s"); + + if ( (j + 1) == prepositionHead ) { + + fieldsj[1] = fieldsj[1] + "_|_" + prepositionToken; + fieldsj[2] = fieldsj[2] + "_|_" + prepositionLemma; + + } + + //updating the IDs for the deletion + if(Integer.parseInt(fieldsj[0]) >= tokenId) { + fieldsj[0] = (Integer.parseInt(fieldsj[0])-1)+""; + } + //updating the heads. I assume that the root cannot be a punctuation mark + if(!fieldsj[6].equals("_") && Integer.parseInt(fieldsj[6]) > tokenId) { + fieldsj[6] = (Integer.parseInt(fieldsj[6])-1)+""; + } + String line = ""; + for (String field:fieldsj) { + line+= field + "\t"; + } + lines[j]=line; + } + } + } + } + } + } + for (int i = 0; i < lines.length; i++){ + if(!lines[i].isEmpty()) + cleaned_tree+=lines[i]+"\n"; + } + + return cleaned_tree+"\n"; + + } + +} diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/EditDistanceTEDecision.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/EditDistanceTEDecision.java new file mode 100644 index 00000000..21115021 --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/EditDistanceTEDecision.java @@ -0,0 +1,87 @@ +package eu.excitementproject.eop.adarte; + +import eu.excitementproject.eop.common.TEDecision; +import eu.excitementproject.eop.common.DecisionLabel; + +/** + * The EditDistanceTEDecision class implements the + * TEDecision interface. + * + * It supports two kinds of constructors, with confidence value or + * without. + * + * @author Roberto Zanoli + * + */ +public class EditDistanceTEDecision implements TEDecision { + + /** + * the decision label + */ + private DecisionLabel decisionLabel; + + /** + * the ID of the T-H pair + */ + private String pairId; + + /** + * the confidence value + */ + private double confidence; + + + /** + * the constructor + * @param decisionLabel the decision label + * @param pairId the ID of the t-h pair + */ + public EditDistanceTEDecision (DecisionLabel decisionLabel, String pairId) { + + this.decisionLabel = decisionLabel; + this.pairId = pairId; + this.confidence = TEDecision.CONFIDENCE_NOT_AVAILABLE; + + } + + + /** + * the constructor + * @param decisionLabel the decision label + * @param confidence the confidence value + * @param pairId the ID of the t-h pair + */ + public EditDistanceTEDecision (DecisionLabel decisionLabel, String pairId, double confidence) { + + this.decisionLabel = decisionLabel; + this.pairId = pairId; + this.confidence = confidence; + + } + + + @Override + public DecisionLabel getDecision() { + + return decisionLabel; + + } + + + @Override + public double getConfidence() { + + return this.confidence; + + } + + + @Override + public String getPairID() { + + return pairId; + + } + + +} diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/FToken.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/FToken.java new file mode 100644 index 00000000..b171e866 --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/FToken.java @@ -0,0 +1,219 @@ +package eu.excitementproject.eop.adarte; + +/** + * This class represents a token with the information that is available in the + * CoNLL-X file produced by the parser, e.g. token id, lemma, form, pos, head, dprel + * + * @author roberto zanoli + * @author silvia colombo + * + * @since January 2015 + */ +public class FToken { + + //ID Token counter, starting at 1 for each new sentence. + private int id; + //FORM Word form or punctuation symbol. + private String form; + //LEMMA Lemma or stem (depending on particular data set) of word form, or an underscore if not available. + private String lemma; + //STEM; not in CoNLL + //private String stem; + //PoS + private String pos; + //HEAD Head of the current token, which is either a value of ID or zero ('0'). + //Note that depending on the original treebank annotation, there may be multiple tokens with an ID of zero. + private int head; + //DPREL Dependency relation to the HEAD. The set of dependency relations depends on the particular language. + //Note that depending on the original treebank annotation, the dependency relation may be meaningful or simply 'ROOT'. + private String dprel; + //dprel relations from the current token to the root + private String dprelRelations; + + /** + * constructor + * + * @param id the token id + * @param form the token form + * @param lemma the token lemma + * @param pos the token pos + * @param head the token head + * @param dprel the token dependency relation to the HEAD + * + */ + public FToken(int id, String form, String lemma, String pos, int head, String dprel) { + + this.id = id; + this.form = form; + this.lemma= lemma; + this.pos = pos; + //this.stem = stem; + this.head = head; + this.dprel = dprel; + this.dprelRelations = null; + if (this.lemma.equals("no")) + this.dprel = "neg"; + + } + + + /** + * + * Get the token id + * + * @return the token id + */ + public int getId() { + + return this.id; + + } + + + /** + * + * Get the form of the token + * + * @return the form + */ + public String getForm() { + + return this.form; + + } + + + /** + * + * Get the lemma of the token + * + * @return the lemma + */ + public String getLemma() { + + return this.lemma; + + } + + + /** + * Get the stem + */ + /* + public String getStem() { + + return this.stem; + + } + */ + + + /** + * + * Get the POS of the token + * + * @return the pos + */ + public String getPOS() { + + return this.pos; + + } + + + /** + * + * Get the head of the token + * + * @return the head + */ + public int getHead() { + + return this.head; + + } + + + /** + * + * Get the dprel relation + * + * @return the dprel relation + */ + public String getDprel() { + + return this.dprel; + + } + + + /** + * + * set the dprel relations + * + * @param dprelRelations the dprel relations + */ + public void setDprelRelations(String dprelRelations) { + + this.dprelRelations = dprelRelations; + + } + + + /** + * + * Get the dprel relations + * + * @return the dprel relations + */ + public String getDprelRelations() { + + //System.err.println("=======================" + this.deprelRelations); + return this.dprelRelations; + + } + + + /** + * + * Return true when two tokens match; matches can be done considering + * both the lemma and dprel of the tokens or the dprel or lemma only. + * + * @param token2 the token to be matched with the current one + * @param matchType the type of match: lemma-dprel, dprel or lemma + * + * @return true when the tokens match; false otherwise + */ + public boolean match(FToken token2, String matchType) { + + if (matchType != null && matchType.equals("lemma-dprel")) + return (this.lemma.equalsIgnoreCase(token2.getLemma()) && + this.dprel.equals(token2.getDprel())); + if (matchType != null && matchType.equals("dprel")) + return (this.dprel.equals(token2.getDprel())); + else //matchType.equals("lemma") + return (this.lemma.equalsIgnoreCase(token2.getLemma())); + + } + + + /** + * Get a description of the token + * + * return the description of the token + */ + public String toString() { + + return this.id + "__" + + this.form + "__" + + this.lemma + "__" + + this.pos + "__" + + //this.stem + ":" + + this.head + "__" + + this.dprel + "__" + + this.dprelRelations; + + } + + } + \ No newline at end of file diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/FixedWeightTreeEditDistance.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/FixedWeightTreeEditDistance.java new file mode 100644 index 00000000..a362046e --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/FixedWeightTreeEditDistance.java @@ -0,0 +1,803 @@ +package eu.excitementproject.eop.adarte; + +import java.io.File; +import java.lang.reflect.Constructor; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Vector; +import java.util.logging.ConsoleHandler; +import java.util.logging.Handler; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.uima.jcas.JCas; + +import treedist.EditScore; +import treedist.Mapping; +import treedist.TreeEditDistance; + +import eu.excitementproject.eop.common.component.distance.DistanceCalculation; +import eu.excitementproject.eop.common.component.distance.DistanceComponentException; +import eu.excitementproject.eop.common.component.distance.DistanceValue; +import eu.excitementproject.eop.common.component.scoring.ScoringComponentException; +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.configuration.NameValueTable; +import eu.excitementproject.eop.common.exception.ComponentException; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.common.utilities.configuration.ImplCommonConfig; +import eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + + +/** + * The FixedWeightedTreeEditDistance class implements the DistanceCalculation interface. + * Given a pair of T-H, each of them represented as a sequences of tokens, the edit distance between + * T and H is the minimum number of operations required to convert T to H. + * FixedWeightedTreeEditDistance implements the simplest form of weighted edit distance that simply uses a + * constant cost for each of the edit operations: match, substitute, insert, delete. + * + * The component uses an implementation of Zhang and Shasha's algorithm [Zhang89] for calculating tree edit distance + * that is kindle make available by Yuya Unno from this site: https://github.com/unnonouno/tree-edit-distance + * + * @author roberto zanoli + * @author silvia colombo + * + * @since January 2015 + * + */ +@SuppressWarnings("deprecation") +public class FixedWeightTreeEditDistance implements DistanceCalculation { + + /** + * The alignment component that finds the alignments between the tokens in T and those in H + */ + private LexicalAligner aligner; + /** + * The transformations obtained transforming T into H + */ + private List transformations; + /** + * weight for match + */ + private final double mMatchWeight = 0; + /** + * weight for delete + */ + private final double mDeleteWeight = 1; + /** + * weight for insert + */ + private final double mInsertWeight = 1; + /** + * weight for substitute + */ + private final double mSubstituteWeight = 1; + /** + * the activated instances + */ + private String instances; + /** + * words matching considering: lemma-dprel | dprel | lemma + */ + private String wordMatch; + + /** + * the alignments produced by the alignment components + */ + private Alignments alignments; + /** + * if the punctuation has to be removed from the trees + */ + private boolean punctuationRemoval; + /** + * verbosity level + */ + private String verbosityLevel; + + /** + * the logger + */ + private final static Logger logger = Logger.getLogger(FixedWeightTreeEditDistance.class.getName()); + + + /** + * Construct a fixed weight edit distance + */ + public FixedWeightTreeEditDistance() throws ComponentException { + + logger.info("creating an instance of " + this.getComponentName() + " ..."); + + // the produced transformations + this.transformations = null; + // the activated instances + this.instances = null; + // the alignment component + this.aligner = null; + // the alignments produced by the alginer components + this.alignments = null; + // matches among words, considering: lemma-dprel | dprel | lemma + this.wordMatch = "lemma-dprel"; + + //setting the logger verbosity level + this.verbosityLevel = "INFO"; + //logger.setUseParentHandlers(false); + //ConsoleHandler consoleHandler = new ConsoleHandler(); + //consoleHandler.setLevel(Level.parse(this.verbosityLevel)); + //logger.addHandler(consoleHandler); + //logger.setLevel(Level.parse(this.verbosityLevel)); + + replaceConsoleHandler(Logger.getLogger(""), Level.ALL); + logger.setLevel(Level.parse(this.verbosityLevel)); + + logger.fine("matching words based on:" + this.wordMatch); + + // if the stop words have to be removed + this.punctuationRemoval = true; + logger.fine("punctuation removal enabled:" + punctuationRemoval); + + // default lexical alignment component + String componentName = + "eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner"; + + logger.fine("using:" + componentName + " for creating the alignments"); + + } + + + /** + * Constructor used to create this object. + * + * @param config the configuration + * + */ + public FixedWeightTreeEditDistance(CommonConfig config) throws ConfigurationException, ComponentException { + + // the produced transformations + this.transformations = null; + // the activated instances + this.instances = null; + // the alignment component + this.aligner = null; + // the alignments produce by the aligner + this.alignments = null; + //matches among words, considering: lemma-dprel | dprel | lemma + this.wordMatch = null; + // if the stop words have to be removed + this.punctuationRemoval = false; + + logger.info("creating an instance of " + this.getComponentName() + " ..."); + + try { + + //get the component configuration + NameValueTable componentNameValueTable = + config.getSection(this.getClass().getCanonicalName()); + + //get the selected instance + this.instances = componentNameValueTable.getString("instance"); + + //get the math type to be used to compare words + this.wordMatch = componentNameValueTable.getString("node-match"); + + //get the instance configuration + NameValueTable instanceNameValueTable = + config.getSubSection(this.getClass().getCanonicalName(), instances); + + //setting the logger verbosity level + this.verbosityLevel = instanceNameValueTable.getString("verbosity-level"); + //logger.setUseParentHandlers(false); + //ConsoleHandler consoleHandler = new ConsoleHandler(); + //consoleHandler.setLevel(Level.parse(this.verbosityLevel)); + //logger.addHandler(consoleHandler); + //logger.setLevel(Level.parse(this.verbosityLevel)); + + replaceConsoleHandler(Logger.getLogger(""), Level.ALL); + logger.setLevel(Level.parse(this.verbosityLevel)); + + //get the configuration file of the alignment component + String configurationFile = + instanceNameValueTable.getString("configuration-file"); + + //get the alignment component configuration + String componentName = + instanceNameValueTable.getString("alignment-component"); + + //if the punctuation has to be removed from dependencies trees + this.punctuationRemoval = Boolean.parseBoolean(instanceNameValueTable.getString("punctuation-removal")); + + //create an instance of the alignment component + if (componentName != null && !componentName.equals("")) { + + try { + + Class componentClass = Class.forName(componentName); + Constructor componentClassConstructor = componentClass.getConstructor(CommonConfig.class); + File configFile = new File(configurationFile); + ImplCommonConfig commonConfig = new ImplCommonConfig(configFile); + this.aligner = (LexicalAligner) componentClassConstructor.newInstance(commonConfig); + + } catch (Exception e) { + + throw new ComponentException(e.getMessage()); + + } + + } + + logger.fine("word matching:" + this.wordMatch + "\n" + + "punctuation removal enabled:" + punctuationRemoval + "\n" + + "aligner component:" + componentName); + + + } catch (ConfigurationException e) { + + throw new ComponentException(e.getMessage()); + + } catch (Exception e) { + + throw new ComponentException(e.getMessage()); + + } + + logger.info("done."); + + } + + + @Override + public String getComponentName() { + + return "FixedWeightTreeEditDistance"; + + } + + + @Override + public String getInstanceName() { + + return instances; + + } + + + /** + * Get the transformations used to transform T into H + * + * @return the transformations + * + */ + public List getTransformations() { + + return this.transformations; + + } + + + /** + * shutdown the component and the used resources + */ + public void shutdown() { + + logger.info("shutdown ..."); + + try { + if (this.aligner != null) + this.aligner.close(); + } catch (Exception e) { + logger.warning(e.getMessage()); + } + + this.transformations = null; + this.instances = null; + this.alignments = null; + this.punctuationRemoval = false; + + logger.info("done."); + + } + + + @Override + public DistanceValue calculation(JCas jcas) throws DistanceComponentException { + + DistanceValue distanceValue = null; + + try { + + //get the alignments between T and H produced by the alignment component + alignments = new Alignments(aligner, jcas); + + //get the Text + JCas tView = jcas.getView(LAP_ImplBase.TEXTVIEW); + //get the dependency tree of Text + String t_tree = DependencyTreeUtils.cas2CoNLLX(tView); + logger.finer("\nThe Tree of Text:\n" + t_tree); + + // check if T contains multiple sentences + if(DependencyTreeUtils.checkMultiSentences(t_tree)) { + logger.finer("This fragment contains multiple sentences. They will be merged!"); + t_tree = DependencyTreeUtils.mergeTrees(t_tree); + logger.finer("\nThe Merged Tree of Text:\n" + t_tree); + } + + // check if T contains multiple trees + if(DependencyTreeUtils.checkMultiTree(t_tree)) { + logger.finer("This fragment contains multiple trees. They will be merged!"); + t_tree = DependencyTreeUtils.createFakeTree(t_tree); + logger.finer("\nThe Merged Tree of Text:\n" + t_tree); + } + + // check if T contains phrasal verbs + if(DependencyTreeUtils.checkPhrasalVerbs(t_tree)) { + logger.finer("This fragment contains phrasal verbs whose nodes will be merged!"); + t_tree = DependencyTreeUtils.mergePhrasalVerbs(t_tree); + logger.finer("\nThe Tree after merging:\n" + t_tree); + } + + // remove punctuation + if (this.punctuationRemoval && DependencyTreeUtils.checkPunctuation(t_tree)) { + logger.finer("The fragment contains some punctuation that will be removed!"); + t_tree = DependencyTreeUtils.removePunctuation(t_tree); + logger.finer("\nThe Tree of Text after removing punctuation:\n" + t_tree); + } + + //create the Text fragment + Fragment t_fragment = new Fragment(t_tree); + //get the Hypothesis + JCas hView = jcas.getView(LAP_ImplBase.HYPOTHESISVIEW); + //the dependency tree of Hypothesis + String h_tree = DependencyTreeUtils.cas2CoNLLX(hView); + logger.finer("\nThe Tree of Hypothesis:\n" + h_tree); + + // check if H contains multiple sentences + if(DependencyTreeUtils.checkMultiSentences(h_tree)){ + logger.finer("This fragment contains multiple sentences. They will be removed!"); + h_tree = DependencyTreeUtils.mergeTrees(h_tree); + logger.finer("\nThe Merged Tree of Hypothesis:\n" + h_tree); + } + + // check if H contains multiple sentences + if(DependencyTreeUtils.checkMultiTree(h_tree) ){ + logger.finer("This fragment contains multiple trees. They will be merged!"); + h_tree = DependencyTreeUtils.createFakeTree(h_tree); + logger.finer("\nThe Merged Tree of Hypothesis:\n" + h_tree); + } + + // check if H contains phrasal verbs + if(DependencyTreeUtils.checkPhrasalVerbs(h_tree)){ + logger.finer("This fragment contains phrasal verbs whose nodes will be merged!"); + h_tree = DependencyTreeUtils.mergePhrasalVerbs(h_tree); + logger.finer("\nThe Tree after merging:\n" + h_tree); + } + + // remove punctuation + if (this.punctuationRemoval && DependencyTreeUtils.checkPunctuation(h_tree)) { + logger.finer("The fragment contains some punctuationthat will be removed"); + h_tree = DependencyTreeUtils.removePunctuation(h_tree); + logger.finer("\nThe Tree of Hypothesis after removing punctuation:\n" + h_tree); + } + + //create the Hypothesis fragment + Fragment h_fragment = new Fragment(h_tree); + //calculate the distance between T and H by using the matches + //provided by the alignment component. + distanceValue = distance(t_fragment, h_fragment, alignments); + + } catch (Exception e) { + + throw new DistanceComponentException(e.getMessage()); + + } + + return distanceValue; + + } + + + @Override + public Vector calculateScores(JCas jcas) throws ScoringComponentException { + + DistanceValue distanceValue = null; + Vector v = new Vector(); + + try { + + //get the alignments between T and H produced by the alignment component + alignments = new Alignments(aligner, jcas); + // get Text + JCas tView = jcas.getView(LAP_ImplBase.TEXTVIEW); + //get the dependency tree of Text + String t_tree = DependencyTreeUtils.cas2CoNLLX(tView); + logger.finer("Text:\n" + t_tree); + + if(DependencyTreeUtils.checkMultiSentences(t_tree)){ + t_tree = DependencyTreeUtils.mergeTrees(t_tree); + logger.finer("Merged text:\n" + t_tree); + } + + if(DependencyTreeUtils.checkPhrasalVerbs(t_tree)){ + logger.finer("Warning: phrasal verbs!"); + t_tree = DependencyTreeUtils.mergePhrasalVerbs(t_tree); + logger.finer("\nThe Merged Tree of Text:\n" + t_tree); + } + + //remove punctuation + if (this.punctuationRemoval && DependencyTreeUtils.checkPunctuation(t_tree)) { + t_tree = DependencyTreeUtils.removePunctuation(t_tree); + logger.finer("\nThe Cleaned Tree of Text:\n" + t_tree); + } + + + //create the Text fragment + Fragment t_fragment = new Fragment(t_tree); + //get Hypothesis + JCas hView = jcas.getView(LAP_ImplBase.HYPOTHESISVIEW); + //the dependency tree of Hypothesis + String h_tree = DependencyTreeUtils.cas2CoNLLX(hView); + + logger.finer("Hypothesis:\n" + h_tree); + + if(DependencyTreeUtils.checkMultiSentences(h_tree)){ + h_tree = DependencyTreeUtils.mergeTrees(h_tree); + logger.finer("Merged hypothesis:\n" + h_tree); + } + + if(DependencyTreeUtils.checkPhrasalVerbs(h_tree)){ + logger.finer("Warning: phrasal verbs!"); + h_tree = DependencyTreeUtils.mergePhrasalVerbs(h_tree); + logger.finer("\nThe Merged Tree of Hypothesis::\n" + h_tree); + } + + //remove punctuation + if (this.punctuationRemoval && DependencyTreeUtils.checkPunctuation(h_tree)) { + h_tree = DependencyTreeUtils.removePunctuation(h_tree); + logger.finer("\nThe Cleaned Tree of Hypothesis:\n" + h_tree); + } + + //create the Hypothesis fragment + Fragment h_fragment = new Fragment(h_tree); + + //calculate the distance between T and H by using the matches + //provided by the alignment component. + distanceValue = distance(t_fragment, h_fragment, alignments); + + } catch (Exception e) { + + throw new ScoringComponentException(e.getMessage()); + + } + + v.add(distanceValue.getDistance()); + v.add(distanceValue.getUnnormalizedValue()); + + return v; + + } + + + /** + * Returns the tree edit distance between T and H. During this + * phase the transformations producing H from T are calculated too. + * + * @param t the text fragment + * @param h the hypothesis fragment + * + * @return The edit distance between the sequences of tokens + * + * @throws ArithmeticException + * + */ + public DistanceValue distance(Fragment t, Fragment h, Alignments alignments) throws Exception { + + //here we need to call the library for calculating tree edit distance + double distance = 0.0; + double normalizedDistanceValue = 0.0; + double norm = 1.0; + + try { + + //Creating the Tree of Text + LabeledTree t_tree = createTree(t); + //logger.info("T:" + t_tree); + + //Creating the Tree of Hypothesis + LabeledTree h_tree = createTree(h); + //logger.info("H:" + h_tree); + + //creating an instance of scoreImpl containing the definition of the + //the edit distance operations. + ScoreImpl scoreImpl = new ScoreImpl(t_tree, h_tree); + + //Create an instance of TreeEditDistance + TreeEditDistance dist = new TreeEditDistance(scoreImpl); + + //This is used for storing the sequence of edit distance operations + Mapping map = new Mapping(t_tree, h_tree); + + //Distance calculation + distance = dist.calc(t_tree, h_tree, map); + + //cycle through the list of the edit distance operations (i.e. replace -rep, + //insertion -ins, deletion -del) + //operations are in the format: rep:2,3 rep:1,1 rep:0,0 ins:2 rep:3,4 rep:4,5 + //e.g. rep:2,3 means replacing node id_2 with node id_3 + //List operationSequence = map.getSequence(); + + // calculate the transformations required to transform T into H + this.transformations = computeTransformations(t_tree, h_tree, map); + + // norm is the distance equivalent to the cost of inserting all the nodes in H and deleting + // all the nodes in T. This value is used to normalize distance values. + norm = (double)(t_tree.size() * this.mDeleteWeight + h_tree.size() * this.mInsertWeight); + + normalizedDistanceValue = distance/norm; + + } catch (Exception e) { + + throw new Exception(e.getMessage()); + + } + + return new EditDistanceValue(normalizedDistanceValue, false, distance); + + } + + + /** + * Create a labeled tree + */ + private LabeledTree createTree(Fragment f) throws Exception { + + LabeledTree lTree; + + try { + + //the parents of the nodes + int[] parents = new int[f.size()]; + //the ids of the nodes (they are the ids of the tokens as assigned by the dependency parser). + int[] ids = new int[f.size()]; + //the tokens themselves + FToken[] tokens = new FToken[f.size()]; + + //Filling the data structure + Iterator iterator = f.getIterator(); + int i = 0; + while (iterator.hasNext()) { + FToken token_i = iterator.next(); + //we need to subtract -1 given that the tree edit distance library requires that + //the id of the nodes starts from 0 instead 1. + //System.out.println("======" + token_i); + parents[i] = token_i.getHead(); + ids[i] = token_i.getId(); + tokens[i] = token_i; + i++; + } + + lTree = new LabeledTree ( // + //the parents of the nodes + parents, + //the ids of the tokens + ids, + //the tokens with all their information + tokens); + + } catch (Exception e) { + + throw new Exception(e.getMessage()); + + } + + return lTree; + + } + + /** + * This method accepts in input 2 labeled treed (LabeledTree) and the edit operations on the trees needed to transform + * the tree t_tree into h_tree (map) and return the list of transformations, e.g. + * + * @return the list of transformations + * + * @throws Exception + */ + private List computeTransformations(LabeledTree t_tree, + LabeledTree h_tree, Mapping map) throws Exception { + + List transformations = new ArrayList(); + + try { + + //cycle through the list of the edit distance operations (i.e. replace -rep, + //insertion -ins, deleletion -del) + //operations are in the format: rep:2,3 rep:1,1 rep:0,0 ins:2 rep:3,4 rep:4,5 + //e.g. rep:2,3 means replacing node id_2 with node id_3 + List operationSequence = map.getSequence(); + + for (int i = 0; i < operationSequence.size(); i++) { + + String operation_i = (String)operationSequence.get(i); + //System.err.print(operation_i + " "); + String transformationType = operation_i.split(":")[0]; + String nodes = operation_i.split(":")[1]; + Transformation trans = null; + //case of replace operations; the library we use for tree edit distance doesn't tell us + //if it was a replace or match operation. Distinguish between replace and match is done in this way: + // + //match: + // -- match between tokens + // -- positive alignments + // + //replace: + // -- no matches between tokens + // -- negative alignments or no alignments + // + if (transformationType.contains(Transformation.REPLACE)) { + + int node1 = Integer.parseInt(nodes.split(",")[0]); + int node2= Integer.parseInt(nodes.split(",")[1]); + FToken t_token = t_tree.getToken(node1); + FToken h_token = h_tree.getToken(node2); + + String[] alignment = alignments.getAlignment(t_token, h_token, wordMatch); + + //i.e. LOCAL_ENTAILMENT, LOCAL_CONTRADICTION, LOCAL_SIMILARITY + String alignmentType = alignment[0]; + //e.g. direction:TtoH + String alignmentDirection = alignment[1]; + //e.g. hypernym + String alignmentInfo = alignment[2]; + + //NO ALIGNMENTS --> REPLACE TRANSFORMATION + if (alignmentType == null) { + trans = new Transformation(Transformation.REPLACE, null, t_token, h_token); + } + else if (alignmentType.equals(Alignments.LOCAL_ENTAILMENT) + && alignmentDirection.equals(Alignments.DIRECTION_HtoT)) { + //trans = new Transformation(Transformation.REPLACE, alignmentType, t_token, h_token); + //transformations.add(trans); + trans = new Transformation(Transformation.REPLACE, alignmentInfo, t_token, h_token); + } + //CONTRADICTION --> REPLACE TRANSFORMATION + else if (alignmentType.equals(Alignments.LOCAL_CONTRADICTION)) { + //trans = new Transformation(Transformation.REPLACE, null, t_token, h_token); + //transformations.add(trans); + trans = new Transformation(Transformation.REPLACE, alignmentInfo, t_token, h_token); + } + //ENTAILMENT --> MATCH TRANSFORMATION + else if (alignmentType.equals(Alignments.LOCAL_ENTAILMENT) || + alignmentType.equals(Alignments.LOCAL_SIMILARITY)) { + //trans = new Transformation(Transformation.MATCH, null, t_token, h_token); + //transformations.add(trans); + trans = new Transformation(Transformation.MATCH, alignmentInfo, t_token, h_token); + } + else { + // trans = new Transformation(Transformation.REPLACE, null, t_token, h_token); + //transformations.add(trans); + trans = new Transformation(Transformation.REPLACE,alignmentInfo , t_token, h_token); + } + transformations.add(trans); + } + //case of insertion transformation + else if (transformationType.contains(Transformation.INSERTION)){ + int node = Integer.parseInt(nodes); + FToken token = h_tree.getToken(node); + trans = new Transformation(transformationType, token); + transformations.add(trans); + } + //case of deletion transformation + else { + int node = Integer.parseInt(nodes); + FToken token = t_tree.getToken(node); + trans = new Transformation(transformationType, token); + transformations.add(trans); + } + + } + + } catch (Exception e) { + + throw new Exception(e.getMessage()); + + } + + return transformations; + + } + + + /** + * Replaces the ConsoleHandler for a specific Logger with one that will log + * all messages. This method could be adapted to replace other types of + * loggers if desired. + * + * @param logger + * the logger to update. + * @param newLevel + * the new level to log. + */ + private static void replaceConsoleHandler(Logger logger, Level newLevel) { + + // Handler for console (reuse it if it already exists) + Handler consoleHandler = null; + // see if there is already a console handler + for (Handler handler : logger.getHandlers()) { + if (handler instanceof ConsoleHandler) { + // found the console handler + consoleHandler = handler; + break; + } + } + + if (consoleHandler == null) { + // there was no console handler found, create a new one + consoleHandler = new ConsoleHandler(); + logger.addHandler(consoleHandler); + } + // set the console handler to fine: + consoleHandler.setLevel(newLevel); + + } + + + /** + * The EditDistanceValue class extends the DistanceValue + * to hold the distance calculation result. + */ + private class EditDistanceValue extends DistanceValue { + + public EditDistanceValue(double distance, boolean simBased, double rawValue) + { + super(distance, simBased, rawValue); + } + + } + + + /** + * The class ScoreImpl defines the method for the tree edit distance operations + * with their weights and basic logic. + */ + class ScoreImpl implements EditScore { + + private final LabeledTree tree1, tree2; + + public ScoreImpl(LabeledTree tree1, LabeledTree tree2) { + + this.tree1 = tree1; + this.tree2 = tree2; + + } + + @Override + public double replace(int node1, int node2) { + + FToken token_t = tree1.getToken(tree1.getLabel(node1)); + FToken token_h = tree2.getToken(tree2.getLabel(node2)); + // LOCAL-ENTAILMENT, LOCAL-CONTRADICTION, LOCAL_SIMILARITY + String alignment = alignments.getAlignment(token_t, token_h, wordMatch)[0]; + if (alignment != null && alignment.equals(Alignments.LOCAL_ENTAILMENT)) { + return mMatchWeight; //return 0; + } else if (token_t.getDprel().equals(token_h.getDprel())) + { + return mSubstituteWeight/2; + } + else { //replace + return mSubstituteWeight; //return 1; + } + } + + @Override + public double insert(int node2) { + //return 3; + return mInsertWeight; //return 1; + } + + @Override + public double delete(int node1) { + //return 2; + return mDeleteWeight; //return 1; + } + + } + +} \ No newline at end of file diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/Fragment.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/Fragment.java new file mode 100644 index 00000000..07853a6d --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/Fragment.java @@ -0,0 +1,159 @@ +package eu.excitementproject.eop.adarte; + +import java.util.ArrayList; +import java.util.Iterator; + +/** + * + * This class represents a fragment of text, i.e. the tokens contained in the hypothesis H or text T. + * + * @author roberto zanoli + * @author silvia colombo + * + * @since January 2015 +*/ +public class Fragment { + + private ArrayList tokens; + + + /** + * The constructor + */ + protected Fragment() { + + this.tokens= new ArrayList(); + + } + + + /** + * The constructor + */ + protected Fragment(ArrayList tokens) { + + this.tokens=tokens; + + } + + + /** + * This method accepts in input a tree (it has been produced by cas2CoNLLX + * and it is in CoNLL-X format) and returns a fragment containing all the tokens in the tree + * + * @param dependencyTree + * + * @throws Exception + */ + public Fragment(String dependencyTree) throws Exception { + + this(); + + /* here we need to parse the tree CoNLLX format (i.e. dependencyTree) + / and for each line of it we would need to create an object of the class Token + / and put it into the Fragment + */ + try { + + String[] lines = dependencyTree.split("\n"); + + for (int i = 0; i < lines.length; i++) { + String[] fields = lines[i].split("\\s"); + int tokenId = Integer.parseInt(fields[0]) - 1; + String form = fields[1]; + String lemma = fields[2]; + String pos = fields[3]; + + int head; + if (fields[6].equals("_")) { + head = -1; + } + else + head = Integer.parseInt(fields[6]) - 1; + + String deprel = fields[7]; + //and for each line of it we would need to create an object of the class FToken + //and then put it into the Fragment + FToken token_i = new FToken(tokenId, form, lemma, pos, head, deprel); + addToken(token_i); + } + + } catch (Exception e) { + + throw new Exception(e.getMessage()); + + } + + } + + + /** + * Get the token with id tokenId + * + * @param tokenId the token id + * + * @return the token + */ + protected FToken getToken(int tokenId) { + + return tokens.get(tokenId-1); + + } + + + /** + * Get the number of tokens + * + * @return the number of tokens in the fragment + */ + protected int size() { + + return tokens.size(); + + } + + + /** + * + * Add a new token into the fragment + * + * @param token the token to be added + */ + public void addToken (FToken token) { + + this.tokens.add(token); + + } + + + /** + * Get an iterator over the list of tokens in the fragment + * + * @return the iterator + */ + public Iterator getIterator() { + + return tokens.iterator(); + + } + + + /** + * Print the list of the tokens in the fragment + * + * @return the list of the tokens + */ + public String toString() { + + String frg = ""; + for(FToken token:tokens){ + frg = frg + "\n" + token.toString(); + } + + return frg; + + } + + +} + diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/LabeledTree.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/LabeledTree.java new file mode 100644 index 00000000..3527c072 --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/LabeledTree.java @@ -0,0 +1,123 @@ +package eu.excitementproject.eop.adarte; + +import treedist.TreeImpl; + +/** + * + * This class extends TreeImpl written by Yuya Unno (https://github.com/unnonouno/tree-edit-distance/tree/master/tree-edit-distance) that + * is an implementation of Zhang and Shasha's algorithm [Zhang89] for calculating tree edit distance. + * + * @author roberto zanoli + * @author silvia colombo + * + * @since January 2015 + * + */ +public class LabeledTree extends TreeImpl { + + private int[] labels; + private FToken[] tokens; + + + /** + * The constructor + */ + public LabeledTree(int[] parents, int[] labels) { + super(parents); + + if (parents == null || labels == null) + throw new NullPointerException(); + if (parents.length != labels.length) + throw new IllegalArgumentException(); + + this.labels = labels; + + } + + + /** + * The constructor + */ + public LabeledTree(int[] parents, int[] labels, FToken[] tokens) { + super(parents); + + if (parents == null || labels == null || tokens == null) + throw new NullPointerException(); + if (parents.length != labels.length || parents.length != tokens.length) + throw new IllegalArgumentException(); + + this.labels = labels; + this.tokens = tokens; + + getDeprelRelationsFromNodeToRoot(); + + } + + + /** + * Get the label of the node in the tree (i.e. the token id) + * + * @return the label of the token + */ + protected int getLabel(int nodeId) { + + return labels[nodeId]; + + } + + + /** + * Get the token of the specified node in the tree + * + * @return the token of the specified node + */ + protected FToken getToken(int nodeId) { + + return tokens[nodeId]; + + } + + + /** + * Get the list of the tokens in the tree + * + * @return the list of the tokens + */ + protected FToken[] getTokens() { + + return this.tokens; + + } + + + /** + * It calculates the path (i.e. sequence of dprel relations) from the current node to the root of the tree + * and saves this information as part of the token itself. + */ + private void getDeprelRelationsFromNodeToRoot() { + + for (int z = 0; z < this.tokens.length; z++) { + FToken token_z = this.tokens[z]; + String relations = ""; + int nodeId = token_z.getId(); + //System.out.println("node:" + nodeId); + //int deep = 0; + while (nodeId != -1) { + //deep++; + //if (deep > 2) { + //nodeId = this.getParent(nodeId); + //continue; + //} + String deprel = this.tokens[nodeId].getDprel(); + if (relations.length() == 0) + relations = deprel; + else + relations = relations + "#" + deprel; + nodeId = this.getParent(nodeId); + } + token_z.setDprelRelations(relations); + } + + } + +} \ No newline at end of file diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/MyClassifier.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/MyClassifier.java new file mode 100644 index 00000000..bbd44f74 --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/MyClassifier.java @@ -0,0 +1,559 @@ +package eu.excitementproject.eop.adarte; + +import java.io.*; +import java.lang.reflect.Constructor; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Random; + +import weka.classifiers.Classifier; +import weka.classifiers.Evaluation; +import weka.core.FastVector; +import weka.core.Instance; + + +/** + * + * This class implements the classifier to be used for training and annotating + * and the current implementation is based on weka. + * + * @author roberto zanoli + * @author silvia colombo + * + * @since January 2015 + */ +public class MyClassifier { + + //the classifier + private Classifier classifier = null; + //for cross validation + private int numFolds = 10; + //the feature set + private Map featuresSet; + //the class labels + private FastVector classesList; + //the model of the classifier + private String classifierModel; + + + /** + * Build a new classifier + * + * @param classifierName the classifier name, e.g. weka.classifiers.trees.RandomForest + * @param classifierParameters the classifier parameters, e.g. -I 1000 -K 0 -S 1 + * @param classifierModel the path where the classifier model has to be stored + * + */ + public MyClassifier (String classifierName, String[] classifierParameters, String classifierModel) throws Exception { + + this.classifierModel = classifierModel; + + try { + + Class classifierClass = Class.forName(classifierName); + Constructor classifierClassConstructor = classifierClass.getConstructor(); + this.classifier = (Classifier) classifierClassConstructor.newInstance(); + if (classifierParameters != null && !classifierParameters.equals("")) + this.classifier.setOptions(classifierParameters); + String[] options = this.classifier.getOptions(); + StringBuffer optionsString = new StringBuffer(); + for (int i = 0; i < options.length; i++) { + optionsString.append(options[i]); + optionsString.append(""); + } + + } catch (Exception e) { + + throw new Exception(e.getMessage()); + + } + + //initialize the feature set + initFeaturesList(); + //initialize the structure containing the class labels + initClassesList(); + + } + + /** + * Load a pre-trained classifier + * + * @param classifierModel the classifier model + * + */ + public MyClassifier (String classifierModel) throws Exception { + + this.classifierModel = classifierModel; + + try { + + if (this.classifier == null) { + this.classifier = (Classifier) weka.core.SerializationHelper.read(classifierModel); + } + + //load the feature set used for training the classifier + loadFeaturesSet(); + //load the class set used for training the classifier + loadClasses(); + + } catch (Exception e) { + + throw new Exception(e.getMessage()); + + } + + } + + + /** + * Get the list of the features used for building the classifier + * + * @return the list of features + * + */ + protected Map getFeaturesList() { + + return this.featuresSet; + + } + + + /** + * Get the list of the classes used for building the classifier + * + * @return the list of classes + * + */ + protected FastVector getClassesList() { + + return this.classesList; + + } + + + /** + * Evaluate the created model + * + * @throws Exception + * + */ + protected String evaluateModel(DataSet dataSet) throws Exception { + + StringBuffer result = new StringBuffer(); + + try { + + Evaluation evaluation = new Evaluation(dataSet.getData()); + evaluation.crossValidateModel(classifier, dataSet.getData(), this.numFolds, new Random(1)); + result.append("evaluation summary:"); + result.append("\n"); + result.append(evaluation.toSummaryString()); + result.append("detailed accuracy:"); + result.append("\n"); + result.append(evaluation.toClassDetailsString()); + + } catch (Exception e) { + + throw new Exception("Evaluation model error:" + e.getMessage()); + + } + + return result.toString(); + + } + + + /** + * Initialize the classes structure + */ + protected void initClassesList() { + + this.classesList = new FastVector(); + this.classesList.addElement("fake_class"); + + } + + + /** + * Initialize the features set + */ + protected void initFeaturesList() { + + this.featuresSet = new HashMap(); + this.featuresSet.put("fake_attribute", 0); + //comment to not consider the distance + //this.featuresList.put("distance", 1); + + } + + + /** + * Check if the features set contains the feature + * + * @param feature the feature + * + * @return true if the feature exists, false otherwise + */ + protected boolean containsFeature(String feature) { + + return this.featuresSet.containsKey(feature); + + } + + + /** + * Add a feature in the features set + * + * @param feature the feature + * + * @return true if the feature has been added, false otherwise + * + */ + protected boolean addFeature(String feature) { + + if (!this.featuresSet.containsKey(feature)) { + this.featuresSet.put(feature, getFeaturesSetSize()); + return true; + } + + return false; + + } + + + /** + * Check if the classes set contains the class + * + * @param className the class + * + * @return true if the class exists, false otherwise + */ + protected boolean containsClass(String className) { + + return this.classesList.contains(className); + + } + + + /** + * Add a class in the classes set + * + * @param className the class + * + * @return true if the class has been added, false otherwise + * + */ + protected boolean addClass(String className) { + + if (!this.classesList.contains(className)) { + this.classesList.addElement(className); + return true; + } + + return false; + + } + + + /** + * Get the features set size + * + * @return the features set size + * + */ + protected int getFeaturesSetSize() { + + return featuresSet.size(); + + } + + + /** + * Get the classes set size + * + * @return the classes set size + * + */ + protected int getClassesListSize() { + + return this.classesList.size(); + + } + + + /** + * Save the feature set in a file + * + */ + protected void saveFeaturesSet() throws Exception { + + BufferedWriter writer = null; + StringBuffer stringBuffer = new StringBuffer(); + + //print the number of features + stringBuffer.append(this.featuresSet.size()); + stringBuffer.append("\n"); + + /* + try { + + for (int i = 0; i < attributeNumber; i++) { + String feature_i = dataSet.getData().attribute(i).name(); + if (feature_i.equals(attributeClassName)) + continue; + stringBuffer.append(feature_i); + stringBuffer.append("\n"); + } + + writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(this.classifierModel + ".feature_list.txt", false), "UTF-8")); + + PrintWriter printout = new PrintWriter(writer); + printout.print(stringBuffer); + printout.close(); + + writer.close(); + + } catch (Exception e) { + + throw new Exception("Saving features list error:" + e.getMessage()); + + } + + */ + + try { + + Iterator iterator = this.featuresSet.keySet().iterator(); + while (iterator.hasNext()) { + String feature_i = iterator.next(); + Integer feature_id = this.featuresSet.get(feature_i); + stringBuffer.append(feature_i); + stringBuffer.append("\t"); + stringBuffer.append(feature_id); + stringBuffer.append("\n"); + } + + writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(this.classifierModel + ".feature_set.txt", false), "UTF-8")); + + PrintWriter printout = new PrintWriter(writer); + printout.print(stringBuffer); + printout.close(); + + writer.close(); + + } catch (Exception e) { + + throw new Exception("Saving the feature set error:" + e.getMessage()); + + } + + + } + + /** + * + * Load the feature set + * + */ + protected void loadFeaturesSet() throws Exception { + + try { + + File fileDir = new File(this.classifierModel + ".feature_set.txt"); + + BufferedReader in = new BufferedReader( + new InputStreamReader( + new FileInputStream(fileDir), "UTF8")); + + String str; + int lineCounter = 0; + + while ((str = in.readLine()) != null) { + if (lineCounter == 0) { + int featuresNumber = Integer.parseInt(str); + this.featuresSet = new HashMap(featuresNumber); + } + else { + String[] splitLine = str.split("\t"); + String feature_i = splitLine[0]; + String featureId = splitLine[1]; + this.featuresSet.put(feature_i, new Integer(featureId)); + } + lineCounter++; + } + + in.close(); + + } catch (UnsupportedEncodingException e) { + throw new Exception("Getting features list Unsupported Encoding Exception:" + e.getMessage()); + } catch (IOException e) { + throw new Exception("Getting features list IOError:" + e.getMessage()); + } catch (Exception e) { + throw new Exception("Getting features list error:" + e.getMessage()); + } + + } + + + /** + * Save the class labels list + */ + protected void saveClasses() throws Exception { + + BufferedWriter writer = null; + StringBuffer stringBuffer = new StringBuffer(); + + //print the number of classes + stringBuffer.append(this.classesList.size()); + stringBuffer.append("\n"); + + try { + + for (int i = 0; i < this.classesList.size(); i++) { + + String classLabel_i = (String)this.classesList.elementAt(i); + stringBuffer.append(classLabel_i); + //stringBuffer.append("\t"); + //stringBuffer.append(i); + stringBuffer.append("\n"); + + } + + writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(this.classifierModel + ".classes.txt", false), "UTF-8")); + + PrintWriter printout = new PrintWriter(writer); + printout.print(stringBuffer); + printout.close(); + + writer.close(); + + } catch (Exception e) { + + throw new Exception("Saving the classes error:" + e.getMessage()); + + } + + } + + + /** + * Get the class labels + */ + protected void loadClasses() throws Exception { + + try { + + File fileDir = new File(this.classifierModel + ".classes.txt"); + + BufferedReader in = new BufferedReader( + new InputStreamReader( + new FileInputStream(fileDir), "UTF8")); + + String str; + int lineCounter = 0; + + while ((str = in.readLine()) != null) { + if (lineCounter == 0) { + int classesNumber = Integer.parseInt(str); + this.classesList = new FastVector(classesNumber); + } + else { + String classLabel_i = str; + this.classesList.addElement(classLabel_i); + } + lineCounter++; + } + + in.close(); + + } catch (UnsupportedEncodingException e) { + throw new Exception("Unsupported Encoding Exception:" + e.getMessage()); + } catch (IOException e) { + throw new Exception("IOError:" + e.getMessage()); + } catch (Exception e) { + throw new Exception("Error while loading the classes:" + e.getMessage()); + } + + } + + + /** + * Train the classifier + */ + protected void trainClassifier(DataSet dataSet) throws Exception { + + try { + + //building the classifier + this.classifier.buildClassifier(dataSet.getData()); + //storing the trained classifier to a file for future use + weka.core.SerializationHelper.write(this.classifierModel, this.classifier); + + //save the list of the features with their index in a file to be used + //during the test phase (see the process method) + //saveFeaturesList(trainingDataSet); + saveFeaturesSet(); + + //save the list of the classes and their index in a file to be used + //during the test phase (see the process method) + saveClasses(); + + + } catch (Exception ex) { + + throw new Exception("Training classifier error:" + ex.getMessage()); + + } + + } + + + /** + * Test the classifier + */ + protected double[] testClassifier(DataSet dataSet) throws Exception { + + //it contains a confidence value of each of the predicted classes + double[] score = null; + + try { + + for (int i = 0; i < dataSet.getData().numInstances(); i++) { + + Instance instance_i = dataSet.getData().instance(i); + + //the predicted class, e.g. 1 + //double entailment_class = classifier.classifyInstance(instance_i); + //the class label, e.g. ENTAILMENT + //logger.info("predicted class:" + inputDataset.attribute("class").value((int)entailment_class)); + + //the confidence values + score = this.classifier.distributionForInstance(instance_i); + + } + + } catch (Exception ex) { + + throw new Exception("Testing classifier error:" + ex.getMessage()); + + } + + return score; + + } + + + /** + * Print the classifier type + */ + public String toString() { + + return this.classifier.toString(); + + } + +} diff --git a/adarte/src/main/java/eu/excitementproject/eop/adarte/Transformation.java b/adarte/src/main/java/eu/excitementproject/eop/adarte/Transformation.java new file mode 100644 index 00000000..85a5ff47 --- /dev/null +++ b/adarte/src/main/java/eu/excitementproject/eop/adarte/Transformation.java @@ -0,0 +1,276 @@ +package eu.excitementproject.eop.adarte; + +/** + * + * Transformations are elementary edit operations (i.e., deleting, replacing and inserting pieces + * of text) needed to transform a text into another. In the context of Textual Entailment, the + * transformations of a T-H pair are the edit operations needed to transform T into H. + * + * + * With tree edit distance three different type of transformations can be defined: + * + * -Inserting: insert a node N from the tree of H into the tree of T. + * -Deleting: delete a node N from the tree of T. + * -Replacing: change the label of a node N1 in the source tree (the tree of T) into a label of a + * node N2 of the target tree (the tree of H). + * + * There is a fourth transformation, called as Matching, but that does not count as an edit operation; basically + * it is applied on two nodes that are equal. + * + * + * The transformations are used by the classifier as features and they can be represented at different levels + * of specificity/genericity: + * + * -General Form: it includes the transformation type while the nodes in the transformation are represented + * with the lemma and PoS in addition to the whole chain of the dependency relations (dprel) from the current node + * to the root of the tree. + * + * -Least Specific Form: it includes the transformation type as in the General Form but the nodes in the + * transformation are here only represented by the dependency relations (dprel) that they have with their direct + * parent nodes, i.e., + * + * + * @author roberto zanoli + * @author silvia colombo + * + * @since Janaury 2015 + */ +public class Transformation { + + /* + * Possible type of transformations + */ + //replace transformation + public final static String REPLACE = "rep"; + //match transformation + public final static String MATCH = "match"; + //insertion transformation + public final static String INSERTION = "ins"; + //deletion transformation + public final static String DELETION = "del"; + + //transformation type, i.e. replace, match, deletion, insertion + private String type; + + //the rule type such as "synonym" or "hypernym" for WordNet, "redirect" for Wikipedia + //that is used to produce the transformation (e.g. HYPERNYM of WordNet) + private String resource; + + //token in the Text involved in the transformation + private FToken token_T; + + //token in the Hypothesis involved in the transformation + private FToken token_H; + + + /** + * constructor + * + * @param type the transformation type: deletion, substitution, insertion and matching + * @param resource the rule type such as "synonym" or "hypernym" for WordNet + * @param token_T the token T involved in the transformation + * @param token_H the token H involved in the transformation + * + */ + public Transformation(String type, String resource, FToken token_T, FToken token_H){ + + this.type = type; + this.resource = resource; + this.token_T = token_T; + this.token_H = token_H; + + } + + + /** + * constructor + * + * @param type the transformation type: deletion, substitution, insertion and matching + * @param token the token involved in the transformation + * + */ + public Transformation(String type, FToken token) { + + this.type = type; + if (type.equals(INSERTION)){ + this.token_H = token; + } + else + this.token_T = token; + + } + + + /** + * Get the type of transformation + * + * @return the type + * + */ + protected String getType() { + + return this.type; + + } + + + /** + * Set the type of transformation + * + * @param type the transformation type: deletion, substitution, insertion and matching + * + */ + protected void setType(String type) { + + this.type = type; + + } + + + /** + * Get the the rule type such as "synonym" or "hypernym" for WordNet + * + * @return the resource + * + */ + protected String getResource() { + + return this.resource; + + } + + + /** + * Get the token T + * + * @return the token T + * + */ + protected FToken getToken_T() { + + return token_T; + + } + + + /** + * Get the token H + * + * @return the token H + * + */ + protected FToken getToken_H() { + + return token_H; + + } + + + /** + * Print the transformation: its type, the rule type such as "synonym" or "hypernym" for WordNet used to produce + * this transformation, with the tokens involved in the transformation. + * + * @return the transformation + * + */ + public String toString() { + + switch (this.type) { + + case REPLACE: + + return ("Type: " + this.type + " Resource: " + this.resource + " token_T: " + this.token_T + " token_H: " + this.token_H); + + case MATCH: + return ("Type: " + this.type + " Resource: " + this.resource + " token_T: " + this.token_T + " token_H: " + this.token_H); + + case INSERTION: + return ("Type: " + this.type + " token_H:: " + this.token_H ); + + default: + return ("Type: " + this.type + " token_T: " + this.token_T ); + + } + + } + + + /** + * Print the transformation considering different representations at different levels + * of specificity/genericity: + * + * LeastSpecificForm, IntermediateForm, GeneralForm + * + * @param replace true for consider the replace transformations; false otherwise + * @param match true for consider the match transformations; false otherwise + * @param deletion true for consider the deletion transformations; false otherwise + * @param insertion true for consider the insertion transformations; false otherwise + * @param form how to represent the transformations: LeastSpecificForm | IntermediateForm | GeneralForm + * + * @return the transformation + */ + public String print(boolean replace, boolean match, boolean deletion, boolean insertion, String form) { + + if (type.equals(REPLACE) && replace == true) { + + if (form.equals("LeastSpecificForm")) + return ("Type:" + this.type + "#" + "Resource:" + this.resource + "#" + "T_DPrel:" + this.token_T.getDprel() + "#" + "H_DPrel:" + this.token_H.getDprel()); + else if (form.equals("IntermediateForm")) + return ("Type:" + this.type + "#" + "Resource:" + this.resource + "#" + "T_DPrel:" + this.token_T.getDprelRelations() + "#" + "H_DPrel:" + this.token_H.getDprelRelations()); + else // GeneralForm + return ("Type:" + this.type + "#" + "Resource:" + this.resource + "#" + "T_DPrel:" + this.token_T.getDprelRelations() + "#" + "T_POS:" + this.token_T.getPOS() + "#" + "T_Token:" + this.token_T.getLemma() + "#" + "H_DPrel:" + this.token_H.getDprelRelations() + "#" + "H_POS:" + this.token_H.getPOS() + "#" + "H_Token:" + this.token_H.getLemma()); + + //you can represent the representations in other ways + //return ("Type:" + this.type + "#" + "Info:" + this.info + "#" + "T_DPrel:" + this.token_T.getDprelRelations() + "#" + "H_DPrel:" + this.token_H.getDprelRelations()); + //return ("Type:" + this.type + "#" + "Info:" + this.info + "#" + "T_DPrel:" + this.token_T.getDprel() + "#" + "T_POS:" + this.token_T.getPOS() + "#" + "H_DPrel:" + this.token_H.getDprel() + "#" + "H_POS:" + this.token_H.getPOS()); + //return ("Type:" + this.type + "#" + "Info:" + this.info + "#" + "T_DPrel:" + this.token_T.getDprel() + "#" + "T_POS:" + this.token_T.getPOS() + "#" + "T_Token:" + this.token_T.getLemma() + "#" + "H_DPrel:" + this.token_H.getDprel() + "#" + "H_POS:" + this.token_H.getPOS() + "#" + "H_Token:" + this.token_H.getLemma()); + } + else if (type.equals(MATCH) && match == true) { + + if (form.equals("LeastSpecificForm")) + return ("Type:" + this.type + "#" + "Resource:" + this.resource + "#" + "T_DPrel:" + this.token_T.getDprel() + "#" + "H_DPrel:" + this.token_H.getDprel()); + else if (form.equals("IntermediateForm")) + return ("Type:" + this.type + "#" + "Resource:" + this.resource + "#" + "T_DPrel:" + this.token_T.getDprelRelations() + "#" + "H_DPrel:" + this.token_H.getDprelRelations()); + else // GeneralForm + return ("Type:" + this.type + "#" + "Resource:" + this.resource + "#" + "T_DPrel:" + this.token_T.getDprelRelations() + "#" + "T_POS:" + this.token_T.getPOS() + "#" + "T_Token:" + this.token_T.getLemma() + "#" + "H_DPrel:" + this.token_H.getDprelRelations() + "#" + "H_POS:" + this.token_H.getPOS() + "#" + "H_Token:" + this.token_H.getLemma()); + + //you can represent the representations in other ways + //return ("Type:" + this.type + "#" + "Info:" + this.info + "#" + "T_DPrel:" + this.token_T.getDprelRelations() + "#" + "H_DPrel:" + this.token_H.getDeprelRelations()); + //return ("Type:" + this.type + "#" + "Info:" + this.info + "#" + "T_DPrel:" + this.token_T.getDprel() + "#" + "T_POS:" + this.token_T.getPOS() + "#" + "H_DPrel:" + this.token_H.getDeprel() + "#" + "H_POS:" + this.token_H.getPOS()); + //return ("Type:" + this.type + "#" + "Info:" + this.info + "#" + "T_DPrel:" + this.token_T.getDprel() + "#" + "T_POS:" + this.token_T.getPOS() + "#" + "T_Token:" + this.token_T.getLemma() + "#" + "H_DPrel:" + this.token_H.getDeprel() + "#" + "H_POS:" + this.token_H.getPOS() + "#" + "H_Token:" + this.token_H.getLemma()); + } + else if (type.equals(INSERTION) && insertion == true) { + + if (form.equals("LeastSpecificForm")) + return ("Type:" + this.type + "#" + "H_DPrel:" + this.token_H.getDprel() ); + else if (form.equals("IntermediateForm")) + return ("Type:" + this.type + "#" + "H_DPrel:" + this.token_H.getDprelRelations()); + else // GeneralForm + return ("Type:" + this.type + "#" + "H_DPrel:" + this.token_H.getDprelRelations() + "#" + "H_POS:" + this.token_H.getPOS() + "#" + "H_Token:" + this.token_H.getLemma()); + + //you can represent the representations in other ways + //return ("Type:" + this.type + "#" + "H_DPrel:" + this.token_H.getDeprelRelations() ); + //return ("Type:" + this.type + "#" + "H_DPrel:" + this.token_H.getDprel() + "#" + "H_POS:" + this.token_H.getPOS()); + //return ("Type:" + this.type + "#" + "H_DPrel:" + this.token_H.getDprel() + "#" + "H_POS:" + this.token_H.getPOS() + "#" + "H_Token:" + this.token_H.getLemma()); + } + else if (type.equals(DELETION) && deletion == true) { + + if (form.equals("LeastSpecificForm")) + return ("Type:" + this.type + "#" + "T_DPrel:" + this.token_T.getDprel() ); + else if (form.equals("IntermediateForm")) + return ("Type:" + this.type + "#" + "T_DPrel:" + this.token_T.getDprelRelations()); + else // GeneralForm + return ("Type:" + this.type + "#" + "T_DPrel:" + this.token_T.getDprelRelations() + "#" + "T_POS:" + this.token_T.getPOS() + "#" + "T_Token:" + this.token_T.getLemma()); + + //you can represent the representations in other ways + //return ("Type:" + this.type + "#" + "T_DPrel:" + this.token_T.getDprelRelations() ); + //return ("Type:" + this.type + "#" + "T_DPrel:" + this.token_T.getDprel() + "#" + "T_POS:" + this.token_T.getPOS()); + //return ("Type:" + this.type + "#" + "T_DPrel:" + this.token_T.getDprel() + "#" + "T_POS:" + this.token_T.getPOS() + "#" + "T_Token:" + this.token_T.getLemma()); + } + + return null; + + } + + +} diff --git a/adarte/src/main/resources/configuration-file/AdArte_EN.xml b/adarte/src/main/resources/configuration-file/AdArte_EN.xml new file mode 100644 index 00000000..c01de629 --- /dev/null +++ b/adarte/src/main/resources/configuration-file/AdArte_EN.xml @@ -0,0 +1,239 @@ + + + + + + + + + + +
+ + eu.excitementproject.eop.adarte.AdArte + + EN + + eu.excitementproject.eop.lap.dkpro.MaltParserEN +
+ + + + + +
+ + + basic + + + + + + + + + + + true + + + + lemma-dprel + + + + + INFO + + + + + + + + + + + eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner + + ./src/main/resources/configuration-file/LexicalAligner_EN.xml + + + + true + + + + lemma-dprel + + + + + INFO + + + + + + +
+ + + +
+ + + + + + /hardmnt/norris0/home/zanoli/SICK_DATA_SET_ESP_LRESOURCESE/SICK/tmpTrain/ + + /hardmnt/norris0/home/zanoli/SICK_DATA_SET_ESP_LRESOURCESE/SICK/tmpTest/ + + + + + + + LeastSpecificForm + + + + + + + + + + + + + + weka.classifiers.trees.RandomForest + -I 1000 -K 0 -S 1 + /hardmnt/norris0/home/zanoli/SICK_DATA_SET_ESP_LRESOURCESE/RandomForest.model + + + + + false + + + + true + + + /hardmnt/norris0/home/zanoli/SICK_DATA_SET_ESP_LRESOURCESE/SICK/tmp/data.arff + + + + INFO + + + + + + eu.excitementproject.eop.adarte.FixedWeightTreeEditDistance + + + match,replacement,insertion,deletion + +
+ +
diff --git a/adarte/src/main/resources/configuration-file/LexicalAligner_EN.xml b/adarte/src/main/resources/configuration-file/LexicalAligner_EN.xml new file mode 100644 index 00000000..200fc118 --- /dev/null +++ b/adarte/src/main/resources/configuration-file/LexicalAligner_EN.xml @@ -0,0 +1,113 @@ + + + +
+ 1 + ../src/test/resources/model/GATE-3.1/plugins/Tools/resources/morph/default.rul +
+ +
+ + + eu.excitementproject.eop.core.component.lexicalknowledge.wordnet.WordnetLexicalResource + + + eu.excitementproject.eop.core.component.lexicalknowledge.verb_ocean.VerbOceanLexicalResource + + + eu.excitementproject.eop.core.component.lexicalknowledge.catvar.CatvarLexicalResource + + + + + + + + +
+ + + +
+ true + 3.0 + + + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/BIUTEE_Environment/data/WordNet/3.0/dict.wn.orig + + + + false + false + ANTONYM,SYNONYM,DERIVATIONALLY_RELATED,HYPERNYM,INSTANCE_HYPERNYM,MEMBER_HOLONYM,ENTAILMENT + + + 10 + +
+ + +
+ true + unrefined.2004-05-20 + 1 + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/VerbOcean/verbocean.unrefined.2004-05-20.txt + SIMILAR,OPPOSITE_OF,STRONGER_THAN + +
+ + +
+ true + 2.1 + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/BIUTEE_Environment/data/CatVar/catvar21 +
+ + +
+ /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/BIUTEE_Environment/workdir/redis/ + false + 1.0 + wiki + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/wiki-l2r.rdb + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/wiki-r2l.rdb + 0.001 + 10 + eu.excitementproject.eop.lexicalminer.definition.classifier.syntacticpatterns.offlineClassifiers.syntacticpatternsLocationsSquare + SyntacticOfflinePosRelationLocationSquareClassifier +
+ + +
+ /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/BIUTEE_Environment/workdir/redis/ + + false + 1.0 + distsim-bap + 20 + /home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/ukwac/bap/similarity-l2r.rdb + /home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/ukwac/bap/similarity-r2r.rdb +
+ + +
+ /hardmnt/norris0/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/BIUTEE_Environment/workdir/redis/ + + false + 1.0 + distsim-lin-proximity + eu.excitementproject.eop.distsim.items.LemmaPosBasedElement + 20 + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/reuters/lin/proximity/similarity-l2r.top100.rdb + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/reuters/lin/proximity/similarity-r2r.top100.rdb +
+ +
\ No newline at end of file diff --git a/adarte/src/test/java/eu/excitementproject/eop/adarte/AdArteTest.java b/adarte/src/test/java/eu/excitementproject/eop/adarte/AdArteTest.java new file mode 100644 index 00000000..0b5b22cb --- /dev/null +++ b/adarte/src/test/java/eu/excitementproject/eop/adarte/AdArteTest.java @@ -0,0 +1,186 @@ +package eu.excitementproject.eop.adarte; + +import org.apache.uima.jcas.JCas; + +import java.io.*; + +import eu.excitement.type.entailment.Pair; +import eu.excitementproject.eop.adarte.EditDistanceTEDecision; +import eu.excitementproject.eop.adarte.AdArte; +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.utilities.configuration.ImplCommonConfig; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.PlatformCASProber; +import eu.excitementproject.eop.lap.dkpro.MaltParserEN; + +import java.util.logging.Logger; + +import org.junit.*; +import org.apache.uima.cas.FSIterator; +import org.apache.uima.jcas.cas.TOP; +import org.apache.commons.lang.exception.ExceptionUtils; + +import static org.junit.Assert.*; + + +/** This class tests Edit Distance EDA training and testing it + * on a small portion of the RTE-3 data set for English, German and Italian language. + */ +public class AdArteTest { + + static Logger logger = Logger.getLogger(AdArteTest.class + .getName()); + + + @Test + public void test() { + + logger.info("testing AdArte ..."); + testEnglish(); + + } + + /** + * test on the English data set + * + * @return + */ + public void testEnglish() { + + AdArte tdEDA; + + CommonConfig config = null; + + try { + + // the configuration file + File configFile = new File("./src/test/resources/configuration-file/AdArte_EN.xml"); + + // creating an instance of AdArte + tdEDA = new AdArte(); + + // loading the configuration file + config = new ImplCommonConfig(configFile); + + // the LAP + LAPAccess lap = null; + + // data set for training and test + File input = new File("./src/test/resources/dataset/SICK_EN_EXAMPLE.xml"); + + // tmp directory for storing the pre-processed files + File outputDir = new File("/tmp/AdArte_Test/"); + //build it if it does not exist + if (outputDir.exists() == false) + outputDir.mkdir(); + + // the LAP based on MaltParser using TreeTagger + lap = new MaltParserEN(); + // pre-processing the data set + lap.processRawInputFormat(input, outputDir); + + // initialization and start training + tdEDA.startTraining(config); + + // shutdown + tdEDA.shutdown(); + + // initialization for testing + tdEDA.initialize(config); + + double correct = 0; + double examples = 0; + //double example_CONTRADICTION = 0; + //double example_ENTAILMENT = 0; + //double example_UNKNOWN = 0; + //double correct_CONTRADICTION = 0; + //double correct_ENTAILMENT = 0; + //double correct_UNKNOWN = 0; + + // cycle on the files to be annotated + for (File xmi : outputDir.listFiles()) { + + if (!xmi.getName().endsWith(".xmi")) { + continue; + } + + // the cas containing the T/H pair to be annotated + JCas jcas = PlatformCASProber.probeXmi(xmi, null); + // annotate + EditDistanceTEDecision edtedecision = tdEDA.process(jcas); + + examples++; + if (getGoldLabel(jcas).equalsIgnoreCase(edtedecision.getDecision().toString())) + correct++; + + /* + if (getGoldLabel(jcas).equals("UNKNOWN")) { + example_UNKNOWN++; + if (getGoldLabel(jcas).equalsIgnoreCase(edtedecision.getDecision().toString())) + correct_UNKNOWN++; + } + + else if (getGoldLabel(jcas).equals("CONTRADICTION")) { + example_CONTRADICTION++; + if (getGoldLabel(jcas).equalsIgnoreCase(edtedecision.getDecision().toString())) + correct_CONTRADICTION++; + } + + else if (getGoldLabel(jcas).equals("ENTAILMENT")) { + example_ENTAILMENT++; + if (getGoldLabel(jcas).equalsIgnoreCase(edtedecision.getDecision().toString())) + correct_ENTAILMENT++; + } + */ + + } + + // shutdown + tdEDA.shutdown(); + + // the accuracy has to be 1.0 + assertTrue(correct/examples == 1.0); + + //logger.info("accuracy:" + correct/examples); + //System.err.println("accuracy UNKNOWN:" + correct_UNKNOWN/example_UNKNOWN); + //System.err.println("accuracy CONTRADICTION:" + correct_ENTAILMENT/example_ENTAILMENT); + //System.err.println("accuracy ENTAILMENT:" + correct_CONTRADICTION/example_CONTRADICTION); + //System.err.println("examples:" + examples); + + } catch(LAPException e) { + // check if this is due to missing TreeTagger binary and model. + // In such a case, we just skip this test. + // (see /lap/src/scripts/treetagger/README.txt to how to install TreeTagger) + if (ExceptionUtils.getRootCause(e) instanceof java.io.IOException) { + logger.info("Skipping the test: TreeTagger binary and/or models missing. \n To run this testcase, TreeTagger installation is needed. (see /lap/src/scripts/treetagger/README.txt)"); + Assume.assumeTrue(false); // we won't test this test case any longer. + } + } catch (Exception e) { + // if this is some other exception, the test will fail + fail(e.getMessage()); + } + + } + + /** + * @param aCas + * the JCas object + * @return if the T-H pair contains the gold answer, return it; otherwise, + * return null + */ + private String getGoldLabel(JCas aCas) { + FSIterator pairIter = aCas.getJFSIndexRepository() + .getAllIndexedFS(Pair.type); + Pair p = (Pair) pairIter.next(); + if (null == p.getGoldAnswer() || p.getGoldAnswer().equals("") + || p.getGoldAnswer().equals("ABSTAIN")) { + return null; + } else { + return p.getGoldAnswer(); + } + } + +} + + \ No newline at end of file diff --git a/adarte/src/test/resources/configuration-file/AdArte_EN.xml b/adarte/src/test/resources/configuration-file/AdArte_EN.xml new file mode 100644 index 00000000..25159398 --- /dev/null +++ b/adarte/src/test/resources/configuration-file/AdArte_EN.xml @@ -0,0 +1,239 @@ + + + + + + + + + + +
+ + eu.excitementproject.eop.adarte.AdArte + + EN + + eu.excitementproject.eop.lap.dkpro.MaltParserEN +
+ + + + + +
+ + + basic + + + + + + + + + + + true + + + + lemma-dprel + + + + + INFO + + + + + + + + + + + eu.excitementproject.eop.core.component.alignment.lexicallink.LexicalAligner + + ./src/test/resources/configuration-file/LexicalAligner_EN.xml + + + + true + + + + lemma-dprel + + + + + INFO + + + + + + +
+ + + +
+ + + + + + /tmp/AdArte_Test/ + + /tmp/AdArte_Test/ + + + + + + + LeastSpecificForm + + + + + + + + + + + + + + weka.classifiers.trees.RandomForest + -I 1000 -K 0 -S 1 + /tmp/AdArte_Test/RandomForest.model + + + + + false + + + + true + + + /tmp/AdArte_Test/data.arff + + + + INFO + + + + + + eu.excitementproject.eop.adarte.FixedWeightTreeEditDistance + + + match,replacement,insertion,deletion + +
+ +
diff --git a/adarte/src/test/resources/configuration-file/LexicalAligner_EN.xml b/adarte/src/test/resources/configuration-file/LexicalAligner_EN.xml new file mode 100644 index 00000000..f3bf9f7f --- /dev/null +++ b/adarte/src/test/resources/configuration-file/LexicalAligner_EN.xml @@ -0,0 +1,113 @@ + + + +
+ 1 + ../src/test/resources/model/GATE-3.1/plugins/Tools/resources/morph/default.rul +
+ +
+ + + eu.excitementproject.eop.core.component.lexicalknowledge.wordnet.WordnetLexicalResource + + + eu.excitementproject.eop.core.component.lexicalknowledge.verb_ocean.VerbOceanLexicalResource + + + eu.excitementproject.eop.core.component.lexicalknowledge.catvar.CatvarLexicalResource + + + + + + + +
+ + + +
+ true + 3.0 + + + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/BIUTEE_Environment/data/WordNet/3.0/dict.wn.orig + + + + false + false + ANTONYM,SYNONYM,DERIVATIONALLY_RELATED,HYPERNYM,INSTANCE_HYPERNYM,MEMBER_HOLONYM,ENTAILMENT + + + 10 + +
+ + +
+ true + unrefined.2004-05-20 + 1 + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/VerbOcean/verbocean.unrefined.2004-05-20.txt + SIMILAR,OPPOSITE_OF,STRONGER_THAN + +
+ + +
+ true + 2.1 + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/BIUTEE_Environment/data/CatVar/catvar21 +
+ + +
+ /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/BIUTEE_Environment/workdir/redis/ + false + 1.0 + wiki + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/wiki-l2r.rdb + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/wiki-r2l.rdb + 0.001 + 10 + eu.excitementproject.eop.lexicalminer.definition.classifier.syntacticpatterns.offlineClassifiers.syntacticpatternsLocationsSquare + SyntacticOfflinePosRelationLocationSquareClassifier +
+ + + +
+ /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/BIUTEE_Environment/workdir/redis/ + + false + 1.0 + distsim-bap + 20 + /home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/ukwac/bap/similarity-l2r.rdb + /home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/ukwac/bap/similarity-r2r.rdb +
+ + +
+ /hardmnt/norris0/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/BIUTEE_Environment/workdir/redis/ + + false + 1.0 + distsim-lin-proximity + eu.excitementproject.eop.distsim.items.LemmaPosBasedElement + 20 + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/reuters/lin/proximity/similarity-l2r.top100.rdb + /hardmnt/norris0/home/zanoli/EOP/Excitement-Open-Platform-1.1.3/target/EOP-1.1.3/eop-resources-1.1.3/redis/reuters/lin/proximity/similarity-r2r.top100.rdb +
+ +
\ No newline at end of file diff --git a/adarte/src/test/resources/dataset/SICK_EN_EXAMPLE.xml b/adarte/src/test/resources/dataset/SICK_EN_EXAMPLE.xml new file mode 100644 index 00000000..737cd5da --- /dev/null +++ b/adarte/src/test/resources/dataset/SICK_EN_EXAMPLE.xml @@ -0,0 +1,16 @@ + + + +Two people are kickboxing and spectators are not watching +Two people are kickboxing and spectators are watching + + +Two young women are sparring in a kickboxing fight +Two women are sparring in a kickboxing match + + +Two people are kickboxing and spectators are watching +Two young women are not sparring in a kickboxing fight + + + diff --git a/alignmentedas/pom.xml b/alignmentedas/pom.xml index aae11626..604f511f 100644 --- a/alignmentedas/pom.xml +++ b/alignmentedas/pom.xml @@ -4,7 +4,7 @@ eu.excitementproject eop - 1.2.0 + 1.2.1 alignmentedas alignmentedas @@ -17,12 +17,12 @@ eu.excitementproject core - 1.2.0 + 1.2.1 eu.excitementproject lap - 1.2.0 + 1.2.1 diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/P1EdaVisualizer.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/P1EdaVisualizer.java new file mode 100644 index 00000000..69a33ce4 --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/P1EdaVisualizer.java @@ -0,0 +1,872 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.visualization; + +/** + * An implementation if the {@link eu.excitementproject.eop.alignmentedas.p1eda.visualization.Visualizer} interface, based on {@link Brat visualizer http://brat.nlplab.org/embed.html} + + * Visualizes POS and dependency relation annotations and alignments. + * Provides GUI for filtering annotations and alignments. + * + * @author Eden Erez + * @since Jan 6, 2015 + * + */ + + +import java.awt.Desktop; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.OutputStreamWriter; +import java.net.URL; +import java.util.Collection; +import java.util.HashMap; +import java.util.Vector; + +import org.apache.uima.cas.CASException; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.jcas.JCas; +import org.uimafit.util.CasUtil; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency; + +import eu.excitement.type.alignment.Link; +import eu.excitementproject.eop.alignmentedas.p1eda.TEDecisionWithAlignment; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.FeatureValue; +import eu.excitementproject.eop.alignmentedas.p1eda.subs.ValueException; +import eu.excitementproject.eop.common.DecisionLabel; +import eu.excitementproject.eop.common.utilities.uima.UimaUtils; +import eu.excitementproject.eop.common.utilities.uima.UimaUtilsException; + +public class P1EdaVisualizer implements Visualizer { + + protected HashMap hashPOS; + protected HashMap> hashRel; + protected HashMap hashTEEntities; + + protected String strDocText; + protected String strDocData; + protected String strDocEntities; + + protected String docAlignmentData; + protected String strRelationEntities; + protected HashMap hashAlignmentData; + protected HashMap hashAlignmentEntities; + protected String strRelationData; + protected StringBuilder strHtml; + + public P1EdaVisualizer() { + init(); + } + + /* (non-Javadoc) + * @see eu.excitementproject.eop.alignmentedas.p1eda.visualization.Visualizer#generateHTML(eu.excitementproject.eop.alignmentedas.p1eda.TEDecisionWithAlignment) + * + * Note: There might be problems with the generated html in case the strings in the given JCas (i.e., the text and the hypothesis) contain empty spaces. + * It is high recommended to trim these spaces while generating the provided JCas. + + */ + public String generateHTML(TEDecisionWithAlignment decision) throws VisualizerGenerationException + { + init(); + JCas jCas = decision.getJCasWithAlignment(); + Vector featureValues = decision.getFeatureVector(); + DecisionLabel label = decision.getDecision(); + Double confidence = decision.getConfidence(); + + try { + return generateHTML(jCas, label.toString(), confidence.toString(), featureValues); + } catch (ValueException e) { + throw new VisualizerGenerationException(e); + } + } + + /* (non-Javadoc) + * @see eu.excitementproject.eop.alignmentedas.p1eda.visualization.Visualizer#generateHTML(org.apache.uima.jcas.JCas) + * + * + * Note: There might be problems with the generated html, in case the strings in the given JCas (i.e., the text and the hypothesis) contain empty spaces. + * It is high recommended to trim these spaces while generating the provided JCas. + */ + public String generateHTML(JCas jCas) throws VisualizerGenerationException + { + try { + return generateHTML(jCas,null, null, null); + } catch (ValueException e) { + throw new VisualizerGenerationException(e); + } + } + + + /** + * Generates an html string, which visualizes the various annotations and alignments defined in the JCas (with filtering options), and some details on the entailment decision + * + * @param jCas JCas object, composed of text, hypothesis and their annotations (e.g., part-of-speech, dependency relations, alignments) + * @param strDecisionLabel A description of the entailment decision + * @param confidence The confidence of the entailment decision + * @param featureValues A list of the features and their values, used for the entailment decision. + * @return an html string, which visualizes the various annotations and alignments defined in the JCas, the features, and the entailment decision. + * @throws ValueException + */ + protected String generateHTML(JCas jCas,String strDecisionLabel, String confidence , Vector featureValues ) throws ValueException + { + + init(); + + // define the colors of the entity annotations and their relations + String alignmentEntityColor = "#88ccFf"; + String entityPOSColor = "#7fffa2"; + String relationAlignColor = "blue"; + String relationDEPColor = "green"; + + + // basic containers for the entities, the relations and the alignments + HashMap entities = new HashMap(); + HashMap relationEntities = new HashMap(); + HashMap alignmentEntities = new HashMap(); + + // basic java-script variables + strDocEntities = "var collData = { entity_types: [ \r\n"; + strDocText = "var docData = { \r\n"; + strDocData = "docData['entities'] = [ \r\n"; + strRelationEntities = "collData['relation_types'] = [ \r\n"; + strRelationData = "docData['relations'] = [ \r\n"; + strHtml = new StringBuilder(); + + int countInstances = 0; + int countRelation = 0; + try { + + // Get the text and the hypothesis data from the JCas + JCas jCasText = jCas.getView("TextView"); + JCas jCasHypothesis = jCas.getView("HypothesisView"); + + String txText = jCasText.getDocumentText().replaceAll("'", "`"); + String hpText = jCasHypothesis.getDocumentText().replaceAll("'", "`"); + + strDocText += " text : '"+txText+"\\r\\n"+hpText+"'\r\n"; + + int TextSize = txText.length()+2; + Collection col = CasUtil.selectAll(jCasText.getCas()); + Collection colH = CasUtil.selectAll(jCasHypothesis.getCas()); + + checkAllTypes(col); + checkAllTypes(colH); + + hashPOS = new HashMap(); + hashRel = new HashMap>(); + //check if there is Dependency + boolean hasDependency = getIfThereIsDependency(col); + + if(hasDependency) + { + + + //for text sentence + updateEntitiesAndRelations(col, 0); + //for hypothesis sentence + updateEntitiesAndRelations(colH, TextSize); + + // adding the POS collection and data + for (String entity : hashPOS.keySet()) { + String strVal = hashPOS.get(entity); + if(!entities.containsKey(strVal)) + { + if(entities.keySet().size()!=0) + strDocEntities+=", \r\n"; + + strDocEntities+=" { \r\n"; + strDocEntities+=" type : '"+strVal+"', \r\n"; + strDocEntities+=" labels : ['"+strVal+"'], \r\n"; + strDocEntities+=" bgColor: '"+entityPOSColor+"', \r\n"; + strDocEntities+=" borderColor: 'darken' \r\n"; + strDocEntities+="} \r\n"; + + entities.put(strVal, true); + } + int indexOfS = entity.indexOf("S"); + String begin = entity.substring(1,indexOfS); + String end = entity.substring(indexOfS+1); + strDocData += " ['"+entity+"', '"+strVal+"', [["+begin+", "+end+"]]], \r\n"; + } + + // adding the relations collection and data + for (String fromRelation : hashRel.keySet()) { + HashMap hashTo = hashRel.get(fromRelation); + for (String toRelation : hashTo.keySet()) { + String type = hashTo.get(toRelation); + + if(!relationEntities.containsKey(type)) + { + if(relationEntities.keySet().size()!=0) + strRelationEntities+=", \r\n"; + + strRelationEntities += " { \r\n"; + strRelationEntities += " type : '"+type+"', \r\n"; + strRelationEntities += " labels : ['"+type+"'], \r\n"; + strRelationEntities += " dashArray: '3,3', \r\n"; + strRelationEntities += " color : '"+relationDEPColor+"', \r\n"; + strRelationEntities += " args : [ \r\n"; + strRelationEntities += " {role: 'From'},\r\n"; + strRelationEntities += " {role: 'To'}\r\n"; + strRelationEntities += " ] \r\n"; + strRelationEntities += " } \r\n"; + relationEntities.put(type, true); + } + + strRelationData += " ['R"+(++countRelation)+"', '"+type+"', [['From', '"+fromRelation+"'], ['To', '"+toRelation+"']]], \r\n"; + } + } + + + } + else + { + //for text sentence + for (AnnotationFS annotationFS : col) { + Type type = annotationFS.getType(); + //String typeShortName = type.getShortName(); + int begin = annotationFS.getBegin(); + int end = annotationFS.getEnd(); + String strVal = ""; + /*if(typeShortName.equals("Lemma")) + { + strVal=((Lemma)annotationFS).getValue(); + if(strVal.contains("'")) + strVal=strVal.replaceAll("'", "\\\\'"); + + if(!entities.containsKey(strVal)) + { + if(entities.keySet().size()!=0) + strDocEntities+=", "; + + strDocEntities+=" { "; + strDocEntities+=" type : '"+strVal+"', "; + strDocEntities+=" labels : ['"+strVal+"'], "; + strDocEntities+=" bgColor: '#7fa2ff', "; + strDocEntities+=" borderColor: 'darken' "; + strDocEntities+="} "; + entities.put(strVal, true); + } + strDocData += " ['T"+(++countInstances)+"', '"+strVal+"', [["+begin+", "+end+"]]], "; + }*/ + if(type.toString().startsWith("de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos")) + { + strVal=((POS)annotationFS).getPosValue(); + if(!entities.containsKey(strVal)) + { + if(entities.keySet().size()!=0) + strDocEntities+=", \r\n"; + + strDocEntities+=" { \r\n"; + strDocEntities+=" type : '"+strVal+"', \r\n"; + strDocEntities+=" labels : ['"+strVal+"'], \r\n"; + strDocEntities+=" bgColor: '"+entityPOSColor+"', \r\n"; + strDocEntities+=" borderColor: 'darken' \r\n"; + strDocEntities+="} \r\n"; + + entities.put(strVal, true); + } + strDocData += " ['T"+(++countInstances)+"', '"+strVal+"', [["+begin+", "+end+"]]], \r\n"; + } + } + + //for hypothesis sentence + for (AnnotationFS annotationFS : colH) { + int begin = annotationFS.getBegin()+TextSize; + int end = annotationFS.getEnd()+TextSize; + String strVal = ""; + Type type = annotationFS.getType(); + //String typeShortName = type.getShortName(); + if(type.toString().startsWith("de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos")) + { + strVal=((POS)annotationFS).getPosValue(); + if(!entities.containsKey(strVal)) + { + if(entities.keySet().size()!=0) + strDocEntities+=", \r\n"; + strDocEntities+=" { \r\n"; + strDocEntities+=" type : '"+strVal+"', \r\n"; + strDocEntities+=" labels : ['"+strVal+"'], \r\n"; + strDocEntities+=" bgColor: '"+entityPOSColor+"', \r\n"; + strDocEntities+=" borderColor: 'darken' \r\n"; + strDocEntities+="} \r\n"; + entities.put(strVal, true); + } + strDocData += " ['T"+(++countInstances)+"', '"+strVal+"', [["+begin+", "+end+"]]], \r\n"; + + } + + } + } + + + + // get "Link" type annotations back.. + for (Link l : JCasUtil.select(jCasHypothesis, Link.class)) + { + + // you can access Link, as normal, annotation. Of course. + int tBegin = l.getTSideTarget().getBegin(); + int hBegin = l.getHSideTarget().getBegin()+TextSize; + int tEnd = l.getTSideTarget().getEnd(); + int hEnd = l.getHSideTarget().getEnd()+TextSize; + String tText = l.getTSideTarget().getCoveredText(); + String hText = l.getHSideTarget().getCoveredText(); + + if(tText.contains("'")) + tText=tText.replaceAll("'", "`"); + if(hText.contains("'")) + hText=hText.replaceAll("'", "`"); + + if(!entities.containsKey(tText+"Sred")) + { + if(entities.keySet().size()!=0) + strDocEntities+=", \r\n"; + + strDocEntities+=" { \r\n"; + strDocEntities+=" type : '"+tText+"Sred"+"', \r\n"; + strDocEntities+=" labels : ['"+tText+"'], \r\n"; + strDocEntities+=" bgColor: '"+alignmentEntityColor+"', \r\n"; + strDocEntities+=" borderColor: 'darken' \r\n"; + strDocEntities+="} \r\n"; + + entities.put(tText, true); + } + if(!alignmentEntities.keySet().contains(tText+tBegin+"S"+tEnd)) + { + String key = "TE"+(++countInstances); + String value = " ['"+key+"', '"+tText+"Sred"+"', [["+tBegin+", "+tEnd+"]]], \r\n"; + if(!hashTEEntities.keySet().contains(key)) + hashTEEntities.put(key, value); + docAlignmentData += value; + alignmentEntities.put(tText+tBegin+"S"+tEnd, true); + } + + + if(!entities.containsKey(hText+"Sred")) + { + if(entities.keySet().size()!=0) + strDocEntities+=", \r\n"; + + strDocEntities+=" { \r\n"; + strDocEntities+=" type : '"+hText+"Sred"+"', \r\n"; + strDocEntities+=" labels : ['"+hText+"'], \r\n"; + strDocEntities+=" bgColor: '"+alignmentEntityColor+"', \r\n"; + strDocEntities+=" borderColor: 'darken' \r\n"; + strDocEntities+="} \r\n"; + + entities.put(hText, true); + } + if(!alignmentEntities.keySet().contains(hText+hBegin+"S"+hEnd)) + { + String key = "TE"+(++countInstances); + String value = " ['"+key+"', '"+hText+"Sred"+"', [["+hBegin+", "+hEnd+"]]], \r\n"; + if(!hashTEEntities.keySet().contains(key)) + hashTEEntities.put(key, value); + docAlignmentData += value; + alignmentEntities.put(hText+hBegin+"S"+hEnd, true); + } + + String relation = l.getID() + " (" + l.getStrength() + ")"; + String []strSplit = l.getID().split("_"); + if(!relationEntities.containsKey(relation)) + { + if(relationEntities.keySet().size()!=0) + strRelationEntities+=", \r\n"; + + strRelationEntities += " { \r\n"; + strRelationEntities += " type : '"+relation+"', \r\n"; + strRelationEntities += " labels : ['"+relation+"'], \r\n"; + strRelationEntities += " dashArray: '3,3', \r\n"; + strRelationEntities += " color : '"+relationAlignColor+"', \r\n"; + strRelationEntities += " args : [ \r\n"; + strRelationEntities += " {role: 'From'},\r\n"; + strRelationEntities += " {role: 'To'}\r\n"; + strRelationEntities += " ] \r\n"; + strRelationEntities += " } \r\n"; + relationEntities.put(relation, true); + } + + if(!hashAlignmentData.keySet().contains(strSplit[0])) + hashAlignmentData.put(strSplit[0], "docData['relations_"+strSplit[0]+"'] = [ "); + + if(!hashAlignmentEntities.keySet().contains(strSplit[0])) + hashAlignmentEntities.put(strSplit[0], "docData['alignment_entity_"+strSplit[0]+"'] = [ "); + + + + + hashAlignmentEntities.put(strSplit[0], hashAlignmentEntities.get(strSplit[0]) + hashTEEntities.get("TE"+(countInstances-1))); + hashAlignmentEntities.put(strSplit[0], hashAlignmentEntities.get(strSplit[0]) + hashTEEntities.get("TE"+(countInstances))); + + String strRelationInstance = " ['RE"+(++countRelation)+"', '"+relation+"', [['From', 'TE"+(countInstances-1)+"'], ['To', 'TE"+countInstances+"']]], \r\n"; + hashAlignmentData.put(strSplit[0], hashAlignmentData.get(strSplit[0]) + strRelationInstance); + } + + + strDocEntities += " ] \r\n"; + strDocEntities += " }; \r\n"; + strDocData += " ]; \r\n"; + strDocText += " }; \r\n"; + + + + docAlignmentData += " ]; \r\n"; + strRelationEntities += " ]; \r\n"; + strRelationData += " ]; \r\n"; + + + + + for (String strBlock : hashAlignmentData.keySet()) { + hashAlignmentData.put(strBlock, hashAlignmentData.get(strBlock) + " ]; "); + } + + for (String strBlock : hashAlignmentEntities.keySet()) { + hashAlignmentEntities.put(strBlock, hashAlignmentEntities.get(strBlock) + " ]; "); + } + + + + // Generate the html string + + strHtml.append("\r\n"); + strHtml.append(" \r\n"); + strHtml.append(" \r\n"); + strHtml.append(" \r\n"); + strHtml.append(" \r\n"); + strHtml.append(" EOP Visualizar\r\n"); + + + strHtml.append("\r\n"); + //strHtml.append(" \r\n"; + strHtml.append(" \r\n"); + strHtml.append(" \r\n"); + strHtml.append(" \r\n"); + + strHtml.append("
\r\n"); + + strHtml.append("

Entailment Visualization

"); + if (strDecisionLabel != null) { + strHtml.append("

Decision: " + strDecisionLabel); + if (confidence != null) { + strHtml.append(", Confidence: " + confidence); + } + strHtml.append("

"); + } + + strHtml.append("
\r\n");
+				strHtml.append(" head.ready(function() {\r\n");
+				strHtml.append("     Util.embed(\r\n");
+				strHtml.append("         '${DIV_ID}',\r\n");
+				strHtml.append(" collData,\r\n");
+				strHtml.append(" docData,\r\n");
+				strHtml.append(" webFontURLs\r\n");
+				strHtml.append("     );\r\n");
+				strHtml.append(" });\r\n");
+				strHtml.append(" 
\r\n"); + strHtml.append("
 \r\n");
+				strHtml.append( strDocEntities );
+				strHtml.append(" 
\r\n"); + strHtml.append("
 \r\n");
+				strHtml.append( strDocText );
+				strHtml.append(" 
\r\n"); + + strHtml.append("
\r\n"); 
+				strHtml.append("  docData['entities'] = [ ];\r\n");
+				strHtml.append("  docData['relations'] = [ ];\r\n");
+				strHtml.append(" 
\r\n"); + + strHtml.append("
 \r\n");
+				strHtml.append( strDocData );
+				strHtml.append(" 
\r\n"); + strHtml.append("
 \r\n");
+				strHtml.append(docAlignmentData);
+				strHtml.append(" 
\r\n"); + + strHtml.append("
 \r\n");
+				strHtml.append( strRelationEntities );
+				strHtml.append(" 
\r\n"); + strHtml.append("
 \r\n");
+				strHtml.append(strRelationData);
+				strHtml.append(" 
\r\n"); + + + + for (String strBlock : hashAlignmentData.keySet()) { + strHtml.append("
 \r\n");
+					
+					strHtml.append( hashAlignmentEntities.get(strBlock) + "\r\n");
+					strHtml.append( hashAlignmentData.get(strBlock));
+					strHtml.append(" 
\r\n"); + ///////XXXXXXXXXXX + //hashAlignmentData.put(strBlock, hashAlignmentData.get(strBlock) + " ]; "); + } + + //strHtml.append("
\r\n"); + strHtml.append("
\r\n"); + strHtml.append("
\r\n"); + strHtml.append("
"); + strHtml.append(" \r\n"); + if (hasDependency) + strHtml.append("\r\n"); + else + strHtml.append("\r\n"); + strHtml.append("\r\n"); + strHtml.append("\r\n"); + strHtml.append("\r\n"); + + if (!hashAlignmentData.keySet().isEmpty()) { + strHtml.append("\r\n"); + for (String strBlock : hashAlignmentData.keySet()) + strHtml.append(" \r\n"); + } else + strHtml.append("\r\n"); + strHtml.append("
Annotations:
Annotations: DependencyPOS
Alignments:"+strBlock+"
Alignments:


\r\n"); + strHtml.append("
"); + + if(featureValues!=null) + { + strHtml.append("

Extracted Features

"); + + strHtml.append(""); + + strHtml.append("
"); + strHtml.append("
"); + strHtml.append(""); + + + boolean bRow = true; + for (FeatureValue featureValue : featureValues) { + if(bRow) + strHtml.append(""); + else + strHtml.append(""); + + strHtml.append(""); + bRow = !bRow; + + } + strHtml.append("
FeatureValue
"+featureValue.getFeatureName()+""+featureValue.getDoubleValue()+"


"); + + + } + + strHtml.append("
\r\n"); + strHtml.append("

\r\n"); + strHtml.append("

\r\n"); + strHtml.append(" \r\n"); + + + strHtml.append("
\r\n"); + + + strHtml.append("
\r\n"); + + strHtml.append(" \r\n"); + strHtml.append("
\r\n"); + + + strHtml.append("\r\n"); + + + } catch (CASException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + + + return strHtml.toString(); + } + + + private static String GetCss() { + String ret = ""; + ret += "@font-face {font-family: Liberation Sans;src: local(\"Liberation Sans\"), local(\"Liberation-Sans\"), url(static/fonts/Liberation_Sans-Regular.ttf) format(\"truetype\");font-weight: normal;font-style: normal;}@font-face {font-family: PT Sans Caption;src: local(\"PT Sans Caption\"), local(\"PTSans-Caption\"), url(static/fonts/PT_Sans-Caption-Web-Regular.ttf) format(\"truetype\");font-weight: normal;font-style: normal;}#svg {margin: 34px auto 100px; padding-top: 15px;}.center_wrapper {left: 0px; top: 0px; width: 100%; height: 100%; display: table; position: absolute;}.center_wrapper > div {vertical-align: middle; display: table-cell;}.center_wrapper > div > div {width: 30em; color: rgb(46, 110, 158); font-family: \"Liberation Sans\", Verdana, Arial, Helvetica, sans-serif; font-size: 12px; margin-right: auto; margin-left: auto;}.center_wrapper > div > div h1 {text-align: center; font-size: 14px;}#no_svg_wrapper {display: none;}svg {border: 1px solid rgb(127, 162, 255); border-image: none; width: 100%; height: 1px; font-size: 15px;}svg.reselect {border: 1px solid rgb(255, 51, 51); border-image: none;}text {font-family: \"Liberation Sans\", Verdana, Arial, Helvetica, sans-serif; font-size: 13px;}path {pointer-events: none;}.span text {font-family: \"PT Sans Caption\", sans-serif; font-size: 10px; pointer-events: none; text-anchor: middle;}.span_type_label {font-family: \"PT Sans Caption\", sans-serif; font-size: 11px;}.arc_type_label {font-family: \"PT Sans Caption\", sans-serif; font-size: 11px;}.attribute_type_label .ui-button-text {font-family: \"PT Sans Caption\", sans-serif; font-size: 11px;}.span rect {stroke-width: 0.75;}.glyph {font-family: sans-serif; font-weight: bold; fill: #444444;}.attribute_warning {stroke: red;}.span rect.False_positive {stroke: #ff4141; stroke-width: 2;}.shadow_True_positive {fill: #00ff00;}.shadow_False_positive {fill: #ff4141;}.comment_False_positive#commentpopup {background-color: rgb(255, 65, 65);}.span rect.False_negative {fill: #ffffff; stroke: #c20000; stroke-width: 2;}.shadow_False_negative {fill: #c20000;}.comment_False_negative#commentpopup {background-color: rgb(194, 0, 0);}.span rect.AnnotationError {stroke-width: 1;}.shadow_AnnotationError {fill: #ff0000;}.comment_AnnotationError#commentpopup {background-color: rgb(255, 119, 119);}.span rect.AnnotationWarning {stroke-width: 1;}.shadow_AnnotationWarning {fill: #ff8800;}.comment_AnnotationWarning#commentpopup {background-color: rgb(255, 153, 0);}.shadow_AnnotatorNotes {fill: #3ab7ee;}.comment_AnnotatorNotes#commentpopup {background-color: rgb(215, 231, 238);}.shadow_Normalized {fill: #3aee37;}.comment_Normalized#commentpopup {background-color: rgb(215, 238, 231);}rect.Normalized {stroke-width: 1.5;}.shadow_AnnotationIncomplete {fill: #aaaaaa;}.span rect.AnnotationIncomplete {fill: #ffffff; stroke: #002200; stroke-width: 0.5;}.comment_AnnotationIncomplete#commentpopup {background-color: rgb(255, 255, 119);}.shadow_AnnotationUnconfirmed {fill: #eeeeee;}.span rect.AnnotationUnconfirmed {opacity: 0.5; stroke: #002200; stroke-width: 0.5;}.comment_AnnotationUnconfirmed#commentpopup {background-color: rgb(221, 221, 255);}.span rect.True_positive {}rect.shadow_EditHighlight {fill: #ffff99;}.shadow_EditHighlight_arc {stroke: #ffff99;}.span path {fill: none;}.span path.curly {stroke-width: 0.5;}.span path.boxcross {opacity: 0.5; stroke: black;}.arcs path {fill: none; stroke: #989898; stroke-width: 1;}.arcs .highlight path {opacity: 1; stroke: #000000; stroke-width: 1.5;}.arcs .highlight text {fill: black; stroke: black; stroke-width: 0.5;}.highlight.span rect {stroke-width: 2px;}.span rect.reselect {stroke-width: 2px;}.span rect.reselectTarget {stroke-width: 2px;}.arcs .reselect path {stroke: #ff0000 !important; stroke-width: 2px;}.arcs .reselect text {fill: #ff0000 !important;}.span rect.badTarget {stroke: #f00;}.arcs text {font-family: \"PT Sans Caption\", sans-serif; font-size: 9px; cursor: default; text-anchor: middle;}.background0 {fill: #ffffff; stroke: none;}.background1 {fill: #eeeeee; stroke: none;}.backgroundHighlight {fill: #ffff99; stroke: none;}.sentnum text {fill: #999999; text-anchor: end;}.sentnum path {stroke: #999999; stroke-width: 1px;}.span_cue {fill: #eeeeee !important;}.drag_stroke {stroke: black;}.drag_fill {fill: black;}.dialog {display: none;}#span_free_div {float: left;}#arc_free_div {float: left;}fieldset {border-radius: 5px; border: 1px solid rgb(166, 201, 226); border-image: none; margin-top: 5px; -webkit-border-radius: 5px; -moz-border-radius: 5px;}fieldset legend {border-radius: 3px; color: white; padding-right: 0.5em; padding-left: 0.5em; font-size: 90%; font-weight: bold; background-color: rgb(112, 168, 210); -webkit-border-radius: 3px; -moz-border-radius: 3px;}.label-like {color: rgb(46, 110, 158); font-family: monospace; font-size: 90%; font-weight: bold;}.accesskey {text-decoration: underline;}.shadow {box-shadow: 5px 5px 5px #444444; -moz-box-shadow: 5px 5px 5px #444444; -webkit-box-shadow: 5px 5px 5px #444444;}#span_selected {font-weight: bold;}#arc_origin {font-weight: bold;}#arc_target {font-weight: bold;}#commentpopup {padding: 10px; border-radius: 3px; border: 1px outset rgb(0, 0, 0); border-image: none; left: 0px; top: 0px; color: rgb(0, 0, 0); font-family: \"Liberation Sans\", Verdana, Arial, Helvetica, sans-serif; display: none; position: fixed; z-index: 20; max-width: 80%; opacity: 0.95; box-shadow: 5px 5px 5px #aaaaaa; background-color: rgb(245, 245, 249); -moz-box-shadow: 5px 5px 5px #aaaaaa; -webkit-box-shadow: 5px 5px 5px #aaaaaa; -webkit-border-radius: 3px; -moz-border-radius: 3px;}#more_info_readme {height: 350px;}#readme_container {position: relative;}#more_readme_button {padding: 2px 5px; top: -2px; right: -2px; position: absolute;}.comment_id {color: rgb(51, 51, 51); font-family: monospace; font-size: 75%; vertical-align: top; float: right;}.comment_type {}.comment_text {font-weight: bold;}.comment_type_id_wrapper {padding-right: 2em;}.norm_info_label {font-size: 80%; font-weight: bold;}.norm_info_value {font-size: 80%;}.norm_info_img {margin-left: 1em; float: right;}#search_form select {width: 100%;}.scroll_fset {height: 200px;}.scroll_fset fieldset {height: 100%; -ms-overflow-x: hidden; -ms-overflow-y: hidden;}.scroll_fset {margin-bottom: 2.5em;}.scroll_fset fieldset {padding-bottom: 2em;}.scroll_fset div.scroller {width: 100%; height: 100%; overflow: auto;}#span_highlight_link {float: right;}#arc_highlight_link {float: right;}#viewspan_highlight_link {float: right;}.unselectable {cursor: default; -moz-user-select: -moz-none; -khtml-user-select: none; -webkit-user-select: none; -o-user-select: none; user-select: none;}* {-webkit-tap-highlight-color: rgba(0, 0, 0, 0); -webkit-text-size-adjust: none; select: none;}.span rect.AddedAnnotation {stroke: #ff4141; stroke-width: 2;}.shadow_AddedAnnotation {fill: #ff4141;}.comment_AddedAnnotation#commentpopup {background-color: rgb(255, 204, 204);}.span rect.MissingAnnotation {stroke: #ffffff; stroke-width: 2;}.shadow_MissingAnnotation {opacity: 0.3; fill: #ff4141;}.comment_MissingAnnotation#commentpopup {background-color: rgb(255, 204, 204);}.span rect.MissingAnnotation + text {opacity: 0.5;}.span rect.ChangedAnnotation {stroke: #ffff99; stroke-width: 2;}.shadow_ChangedAnnotation {fill: #ff4141;}.comment_ChangedAnnotation#commentpopup {background-color: rgb(255, 204, 204);}"; + + return ret; + } + + private void updateEntitiesAndRelations(Collection col, int Identication) { + for (AnnotationFS annotationFS : col) { + Type type = annotationFS.getType(); + String typeShortName = type.getShortName(); + + if(typeShortName.equals("Dependency")) + { + + Token governor = ((Dependency)annotationFS).getGovernor(); + Token dependent = ((Dependency)annotationFS).getDependent(); + String dependencyType = ((Dependency)annotationFS).getDependencyType(); + + //tmp + //System.out.println(governor.getLemma().getValue() + "--" + dependencyType + "--> " + dependent.getLemma().getValue()); + + String strGovernorId="T"+(governor.getBegin()+Identication)+"S"+(governor.getEnd()+Identication); + String strDependentId="T"+(dependent.getBegin()+Identication)+"S"+(dependent.getEnd()+Identication); + + hashPOS.put(strGovernorId, governor.getPos().getPosValue()); + hashPOS.put(strDependentId, dependent.getPos().getPosValue()); + if(!hashRel.keySet().contains(strGovernorId)) + hashRel.put(strGovernorId, new HashMap()); + + hashRel.get(strGovernorId).put(strDependentId, dependencyType); + + } + } + + } + + public static HashMap hashTypes = new HashMap(); + private static void checkAllTypes(Collection col) + { + for (AnnotationFS annotationFS : col) { + Type type = annotationFS.getType(); + if(!type.toString().startsWith("de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos")) + { + String typeShortName = type.getShortName(); + if(!hashTypes.keySet().contains(typeShortName)) + hashTypes.put(typeShortName, true); + } + + } + + } + + + private static boolean getIfThereIsDependency(Collection col) { + for (AnnotationFS annotationFS : col) { + Type type = annotationFS.getType(); + String typeShortName = type.getShortName(); + if(typeShortName.equals("Dependency")) + return true; + } + return false; + } + + public static void main(String[] args) throws VisualizerGenerationException { + + JCas jCas; + try { + jCas = UimaUtils.loadXmi(new File(args[0])); + + + Vector featureVector = new Vector(); + featureVector.add(new FeatureValue("feature1",0.1)); + featureVector.add(new FeatureValue("feature2",0.3)); + featureVector.add(new FeatureValue("feature3",0.7)); + TEDecisionWithAlignment decision = new + TEDecisionWithAlignment(DecisionLabel.Entailment, 0.5, "", jCas, featureVector); + + //Set drawnAnnotations = new Set(); + Visualizer vis = new P1EdaVisualizer(); + String str = vis.generateHTML(decision); + //String str = vis.generateHTML(jCas); + + try { + BufferedWriter bw = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream("temp.html"))); + bw.write(str); + bw.close(); + } catch (Exception e) { + e.printStackTrace(); + } + + File file = new File("temp.html"); + + Desktop desktop = Desktop.isDesktopSupported() ? Desktop.getDesktop() : null; + if (desktop != null && desktop.isSupported(Desktop.Action.BROWSE)) { + try { + desktop.browse(new URL("file:\\\\\\" + file.getAbsolutePath()).toURI()); + } catch (Exception e) { + e.printStackTrace(); + } + } + + } catch (UimaUtilsException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + } + + protected void init() { + strDocText = null; + strDocData = null; + strDocEntities = null; + + hashPOS = new HashMap(); + hashRel = new HashMap>(); + hashTEEntities = new HashMap(); + + strDocEntities = "var collData = { entity_types: [ \r\n"; + + docAlignmentData = "docData['alignment_entity'] = [ \r\n"; + strRelationEntities = "collData['relation_types'] = [ \r\n"; + hashAlignmentData = new HashMap(); + hashAlignmentEntities = new HashMap(); + strRelationData = "docData['relations'] = [ \r\n"; + strHtml = null; + } + + } diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/Visualizer.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/Visualizer.java new file mode 100644 index 00000000..dc2bcb9b --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/Visualizer.java @@ -0,0 +1,38 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.visualization; + +import org.apache.uima.jcas.JCas; + +import eu.excitementproject.eop.alignmentedas.p1eda.TEDecisionWithAlignment; + +/** + * This interface defines the basic functionality of the visualizer: generation of a stand-alone html given a JCas. + * The generated html visualizes the various annotations and alignments defined in the JCas (with filtering options). + * + * + * @author Meni Adler + * @since Jan 6, 2015 + * + */ + + +public interface Visualizer { + + /** + * Generates an html string, which visualizes the various annotations and alignments defined in the JCas (with filtering options). + * + * @param jcas JCas object, composed of text, hypothesis and their annotations (e.g., part-of-speech, dependency relations, alignments) + * @return an html string, which visualizes the various annotations and alignments defined in the JCas. + * @throws VisualizerGenerationException + */ + String generateHTML(JCas jcas) throws VisualizerGenerationException; + + /** + * Generates an html string, which visualizes the various annotations and alignments defined in the JCas (with filtering options), and some details on the entailment decision + * + * @param decision TEDecisionWithAlignment object, composed of JCas, feature vector and entailment decision + * @return an html string, which visualizes the various annotations and alignments defined in the JCas, the features, and the entailment decision. + * @throws VisualizerGenerationException + */ + String generateHTML(TEDecisionWithAlignment decision) throws VisualizerGenerationException; +} + diff --git a/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/VisualizerGenerationException.java b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/VisualizerGenerationException.java new file mode 100644 index 00000000..d8ed3dfb --- /dev/null +++ b/alignmentedas/src/main/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/VisualizerGenerationException.java @@ -0,0 +1,25 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.visualization; + +/** + * @author Meni Adler + * @since Jan 6, 2015 + * + */ + +public class VisualizerGenerationException extends Exception { + + private static final long serialVersionUID = 1L; + + public VisualizerGenerationException() { + super(); + } + + public VisualizerGenerationException(Throwable e) { + super(e); + } + + public VisualizerGenerationException(String msg) { + super(msg); + } + +} diff --git a/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/VisualizerTest.java b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/VisualizerTest.java new file mode 100644 index 00000000..17a94bd7 --- /dev/null +++ b/alignmentedas/src/test/java/eu/excitementproject/eop/alignmentedas/p1eda/visualization/VisualizerTest.java @@ -0,0 +1,104 @@ +package eu.excitementproject.eop.alignmentedas.p1eda.visualization; + +import static org.junit.Assert.*; + + +import java.io.File; + +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.uima.jcas.JCas; +import org.junit.Assume; +import org.junit.Test; + +import eu.excitementproject.eop.alignmentedas.p1eda.P1EDATemplate; +import eu.excitementproject.eop.alignmentedas.p1eda.TEDecisionWithAlignment; +import eu.excitementproject.eop.alignmentedas.p1eda.instances.MinimalP1EDA; +import eu.excitementproject.eop.common.EDAException; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.PlatformCASProber; +import eu.excitementproject.eop.lap.dkpro.TreeTaggerEN; + +public class VisualizerTest { + + @Test + public void test() { + + // Set Log4J for the test + BasicConfigurator.resetConfiguration(); + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.DEBUG); // set INFO to hide Debug + testlogger = Logger.getLogger(getClass().getName()); + + // prepare a lemmatizer + TreeTaggerEN lap = null; + + try + { + lap = new TreeTaggerEN(); + lap.generateSingleTHPairCAS("this is a test.", "TreeTagger in sight?"); + } + catch (Exception e) + { + // check if this is due to missing TreeTagger binary and model. + // In such a case, we just skip this test. + // (see /lap/src/scripts/treetagger/README.txt to how to install TreeTagger) + if (ExceptionUtils.getRootCause(e) instanceof java.io.IOException) + { + testlogger.info("Skipping the test: TreeTagger binary and/or models missing. \n To run this testcase, TreeTagger installation is needed. (see /lap/src/scripts/treetagger/README.txt)"); + Assume.assumeTrue(false); // we won't test this test case any longer. + } + } + + try { + doVisualizerTest(lap); + } + catch (Exception e) + { + e.printStackTrace(); + fail(e.getMessage()); + } + + } + + public void doVisualizerTest(TreeTaggerEN lap) throws EDAException, LAPException, VisualizerGenerationException + { + + // get an instance of the EDA + P1EDATemplate eda = new MinimalP1EDA(); + //new SimpleWordCoverageEN( + // "../core/src/main/resources/ontologies/EnglishWordNet-dict", + //"../core/src/main/resources/VerbOcean/verbocean.unrefined.2004-05-20.txt"); + + // Make the "very simple", "minimal" two training data. + JCas cas1 = lap.generateSingleTHPairCAS("The train was uncomfortable", "the train was comfortable", "NONENTAILMENT"); + JCas cas2 = lap.generateSingleTHPairCAS("The person is hired as a postdoc.","The person is hired as a postdoc.", "ENTAILMENT"); + + File xmiDir = new File("target/xmis/"); + if (!xmiDir.exists()) + { + xmiDir.mkdirs(); + } + File modelBaseName = new File("target/simple"); + + PlatformCASProber.storeJCasAsXMI(cas1, new File("target/xmis/train1.xmi")); + PlatformCASProber.storeJCasAsXMI(cas2, new File("target/xmis/train2.xmi")); + + // Okay. Start Training + eda.startTraining(xmiDir, modelBaseName); + + // ask something? + JCas eopJCas = lap.generateSingleTHPairCAS("This is a very simple configuration.", "This is in fact a complex configuration."); + TEDecisionWithAlignment decision = eda.process(eopJCas); + + //Test the visualizer + Visualizer vis = new P1EdaVisualizer(); + String html = vis.generateHTML(decision); + testlogger.info(html); + } + + public static Logger testlogger; + +} diff --git a/assembly.xml b/assembly.xml index 483c0c5a..e30ad42d 100644 --- a/assembly.xml +++ b/assembly.xml @@ -19,6 +19,10 @@ eu.excitementproject:lexicalminer eu.excitementproject:distsim eu.excitementproject:globalgraphoptimizer + eu.excitementproject:tracer + eu.excitementproject:alignmentedas + eu.excitementproject:redis + eu.excitementproject:adarte / diff --git a/biutee/pom.xml b/biutee/pom.xml index aa3dd9c1..8cc99c39 100644 --- a/biutee/pom.xml +++ b/biutee/pom.xml @@ -4,7 +4,7 @@ eu.excitementproject eop - 1.2.0 + 1.2.1 biutee biutee @@ -36,7 +36,7 @@ eu.excitementproject transformations - 1.2.0 + 1.2.1 diff --git a/common/pom.xml b/common/pom.xml index a30bf093..9507885b 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -3,7 +3,7 @@ eu.excitementproject eop - 1.2.0 + 1.2.1 common common diff --git a/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/NodeShortString.java b/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/NodeShortString.java index ec628a6e..90003118 100644 --- a/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/NodeShortString.java +++ b/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/NodeShortString.java @@ -1,86 +1,86 @@ -package eu.excitementproject.eop.common.representation.parse.tree.dependency.view; - -import eu.excitementproject.eop.common.representation.parse.representation.basic.Info; -import eu.excitementproject.eop.common.representation.parse.representation.basic.InfoGetFields; -import eu.excitementproject.eop.common.representation.parse.tree.AbstractNode; - -public abstract class NodeShortString { - private static final String ROOT_STR = ""; - public abstract String toString(AbstractNode node); - - public static String prepConcrete(AbstractNode node) { - if (node.getInfo().getEdgeInfo().getDependencyRelation() != null && - node.getInfo().getEdgeInfo().getDependencyRelation().getStringRepresentation().equals("prep")) { - return "_" + node.getInfo().getNodeInfo().getWordLemma(); - } - else { - return ""; - } - } - - - //// Concrete Classes //////////////////////////////////////// - - public static class Rel extends NodeShortString { - @Override - public String toString(AbstractNode node) { - return InfoGetFields.getRelation(node.getInfo(), ROOT_STR); - } - } - - public static class RelPrep extends NodeShortString { - @Override - public String toString(AbstractNode node) { - return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+prepConcrete(node); - } - } - - public static class RelPos extends NodeShortString { - @Override - public String toString(AbstractNode node) { - return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getPartOfSpeech(node.getInfo()); - } - } - - public static class RelPrepPos extends NodeShortString { - @Override - public String toString(AbstractNode node) { - return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+prepConcrete(node)+"->"+InfoGetFields.getPartOfSpeech(node.getInfo()); - } - } - - public static class RelCanonicalPos extends NodeShortString { - @Override - public String toString(AbstractNode node) { - return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+node.getInfo().getNodeInfo().getSyntacticInfo().getPartOfSpeech().getCanonicalPosTag(); - } - } - - public static class RelPrepCanonicalPos extends NodeShortString { - @Override - public String toString(AbstractNode node) { - return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+prepConcrete(node)+"->"+node.getInfo().getNodeInfo().getSyntacticInfo().getPartOfSpeech().getCanonicalPosTag(); - } - } - - public static class WordRel extends NodeShortString { - @Override - public String toString(AbstractNode node) { - return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getWord(node.getInfo()); - } - } - - public static class WordRelPos extends NodeShortString { - @Override - public String toString(AbstractNode node) { - return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getWord(node.getInfo())+"/"+InfoGetFields.getPartOfSpeech(node.getInfo()); - } - } - - public static class WordRelCanonicalPos extends NodeShortString { - @Override - public String toString(AbstractNode node) { - return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getWord(node.getInfo())+"/"+node.getInfo().getNodeInfo().getSyntacticInfo().getPartOfSpeech().getCanonicalPosTag(); - } - } -} +package eu.excitementproject.eop.common.representation.parse.tree.dependency.view; + +import eu.excitementproject.eop.common.representation.parse.representation.basic.Info; +import eu.excitementproject.eop.common.representation.parse.representation.basic.InfoGetFields; +import eu.excitementproject.eop.common.representation.parse.tree.AbstractNode; + +public abstract class NodeShortString { + private static final String ROOT_STR = ""; + public abstract String toString(AbstractNode node); + + public static String prepConcrete(AbstractNode node) { + if (node.getInfo().getEdgeInfo().getDependencyRelation() != null && + node.getInfo().getEdgeInfo().getDependencyRelation().getStringRepresentation().equals("prep")) { + return "_" + node.getInfo().getNodeInfo().getWordLemma(); + } + else { + return ""; + } + } + + + //// Concrete Classes //////////////////////////////////////// + + public static class Rel extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR); + } + } + + public static class RelPrep extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+prepConcrete(node); + } + } + + public static class RelPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getPartOfSpeech(node.getInfo()); + } + } + + public static class RelPrepPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+prepConcrete(node)+"->"+InfoGetFields.getPartOfSpeech(node.getInfo()); + } + } + + public static class RelCanonicalPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+node.getInfo().getNodeInfo().getSyntacticInfo().getPartOfSpeech().getCanonicalPosTag(); + } + } + + public static class RelPrepCanonicalPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+prepConcrete(node)+"->"+node.getInfo().getNodeInfo().getSyntacticInfo().getPartOfSpeech().getCanonicalPosTag(); + } + } + + public static class WordRel extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getWord(node.getInfo()); + } + } + + public static class WordRelPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getWord(node.getInfo())+"/"+InfoGetFields.getPartOfSpeech(node.getInfo()); + } + } + + public static class WordRelCanonicalPos extends NodeShortString { + @Override + public String toString(AbstractNode node) { + return InfoGetFields.getRelation(node.getInfo(), ROOT_STR)+"->"+InfoGetFields.getWord(node.getInfo())+"/"+node.getInfo().getNodeInfo().getSyntacticInfo().getPartOfSpeech().getCanonicalPosTag(); + } + } +} diff --git a/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeToLineString.java b/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeToLineString.java index fe443de3..a82101c0 100644 --- a/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeToLineString.java +++ b/common/src/main/java/eu/excitementproject/eop/common/representation/parse/tree/dependency/view/TreeToLineString.java @@ -1,166 +1,166 @@ -package eu.excitementproject.eop.common.representation.parse.tree.dependency.view; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import eu.excitementproject.eop.common.representation.parse.representation.basic.Info; -import eu.excitementproject.eop.common.representation.parse.tree.dependency.basic.BasicNode; -import eu.excitementproject.eop.common.utilities.StringUtil; - -/** - * Convenient static methods for printing a tree in one line, using parentheses to determine nesting. - * @author Ofer Bronstein - * @since August 2014 - */ -public class TreeToLineString { - - private TreeToLineString() {} - - - //// Specific Methods /////////////////////////////////////////////////////// - - /////// Single Node - - public static String getStringWordRel(BasicNode tree) { - return getString(tree, new NodeShortString.WordRel()); - } - - public static String getStringWordRelPos(BasicNode tree) { - return getString(tree, new NodeShortString.WordRelPos()); - } - - public static String getStringWordRelCanonicalPos(BasicNode tree) { - return getString(tree, new NodeShortString.WordRelCanonicalPos()); - } - - - /////// Multiple Nodes - - public static String getStringRel(List trees, boolean withContext, boolean withMagicNodes) { - return getString(trees, withContext, withMagicNodes, new NodeShortString.Rel()); - } - - public static String getStringRelPrep(List trees, boolean withContext, boolean withMagicNodes) { - return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPrep()); - } - - public static String getStringRelPos(List trees, boolean withContext, boolean withMagicNodes) { - return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPos()); - } - - public static String getStringRelPrepPos(List trees, boolean withContext, boolean withMagicNodes) { - return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPrepPos()); - } - - public static String getStringRelCanonicalPos(List trees, boolean withContext, boolean withMagicNodes) { - return getString(trees, withContext, withMagicNodes, new NodeShortString.RelCanonicalPos()); - } - - public static String getStringRelPrepCanonicalPos(List trees, boolean withContext, boolean withMagicNodes) { - return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPrepCanonicalPos()); - } - - public static String getStringWordRel(List trees, boolean withContext, boolean withMagicNodes) { - return getString(trees, withContext, withMagicNodes, new NodeShortString.WordRel()); - } - - public static String getStringWordRelPos(List trees, boolean withContext, boolean withMagicNodes) { - return getString(trees, withContext, withMagicNodes, new NodeShortString.WordRelPos()); - } - - public static String getStringWordRelCanonicalPos(List trees, boolean withContext, boolean withMagicNodes) { - return getString(trees, withContext, withMagicNodes, new NodeShortString.WordRelCanonicalPos()); - } - - - //// Generic Methods /////////////////////////////////////////////////////// - - public static String getString(BasicNode tree, NodeShortString nodeStr) { - return getString(tree, "(", ")", nodeStr); - } - - public static String getString(List trees, boolean withContext, boolean withMagicNodes, NodeShortString nodeStr) { - if (trees.isEmpty()) { - return "(empty-tree)"; - } - String subrootDep = null; - if (!withContext) { - subrootDep = ""; - } - return getString(trees, "(", ")", "#", subrootDep, withMagicNodes, nodeStr); - } - - public static String getString(BasicNode root, String pre, String post, String dep, boolean withMagicNodes, NodeShortString str) { - return getStringSubtree(root, str, pre, post, dep, withMagicNodes).toString().trim(); - } - - public static String getString(BasicNode root, String pre, String post, NodeShortString str) { - return getStringSubtree(root, str, pre, post, null, true).toString().trim(); - } - - public static String getString(BasicNode root, String pre, String post, boolean withMagicNodes, NodeShortString str) { - return getStringSubtree(root, str, pre, post, null, withMagicNodes).toString().trim(); - } - - public static String getString(Collection trees, String pre, String post, String treeSeparator, String dep, boolean withMagicNodes, NodeShortString str) { - List strings = new ArrayList(trees.size()); - for (BasicNode root : trees) { - strings.add(getString(root, pre, post, dep, withMagicNodes, str)); - } - return StringUtil.join(strings, treeSeparator); - } - - public static String getString(Collection trees, String pre, String post, boolean withMagicNodes, NodeShortString str) { - return getString(trees, pre, post, null, "#", withMagicNodes, str); - } - - protected static StringBuffer getStringSubtree(BasicNode subtree, NodeShortString str, String pre, String post, String dep, boolean withMagicNodes) { - final String NULL_TREE_STR = "(null)"; - StringBuffer result = new StringBuffer(); - - if (subtree == null) { - result.append(NULL_TREE_STR); - } - else { - if (subtree.getInfo().getNodeInfo().getWord() != null) { - String nodeDep; - if (dep != null) { - nodeDep = dep; - } - else { - nodeDep = str.toString(subtree); - } - - // "Magic Node" data should just be added to nodeDep - if ( withMagicNodes && - subtree.getInfo().getNodeInfo().getWordLemma()!=null && - MAGIC_NODES.contains(subtree.getInfo().getNodeInfo().getWordLemma())) { - nodeDep += subtree.getInfo().getNodeInfo().getWordLemma(); - } - - result.append(nodeDep); - } - - if (subtree.getChildren() != null) { - for (BasicNode child : subtree.getChildren()) { - result.append(pre); - result.append(getStringSubtree(child, str, pre, post, null, withMagicNodes)); - result.append(post); - } - } - } - - return result; - } - - - // "Magic Nodes" are one with specific importance for a tree/fragment, and should be printed accordingly - public static final String MAGIC_NODE_PREDICATE = "[PRD]"; - public static final String MAGIC_NODE_ARGUMENT = "[ARG]"; - public static final Set MAGIC_NODES = new HashSet(Arrays.asList(new String[] {MAGIC_NODE_PREDICATE, MAGIC_NODE_ARGUMENT})); - -} +package eu.excitementproject.eop.common.representation.parse.tree.dependency.view; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import eu.excitementproject.eop.common.representation.parse.representation.basic.Info; +import eu.excitementproject.eop.common.representation.parse.tree.dependency.basic.BasicNode; +import eu.excitementproject.eop.common.utilities.StringUtil; + +/** + * Convenient static methods for printing a tree in one line, using parentheses to determine nesting. + * @author Ofer Bronstein + * @since August 2014 + */ +public class TreeToLineString { + + private TreeToLineString() {} + + + //// Specific Methods /////////////////////////////////////////////////////// + + /////// Single Node + + public static String getStringWordRel(BasicNode tree) { + return getString(tree, new NodeShortString.WordRel()); + } + + public static String getStringWordRelPos(BasicNode tree) { + return getString(tree, new NodeShortString.WordRelPos()); + } + + public static String getStringWordRelCanonicalPos(BasicNode tree) { + return getString(tree, new NodeShortString.WordRelCanonicalPos()); + } + + + /////// Multiple Nodes + + public static String getStringRel(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.Rel()); + } + + public static String getStringRelPrep(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPrep()); + } + + public static String getStringRelPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPos()); + } + + public static String getStringRelPrepPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPrepPos()); + } + + public static String getStringRelCanonicalPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.RelCanonicalPos()); + } + + public static String getStringRelPrepCanonicalPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.RelPrepCanonicalPos()); + } + + public static String getStringWordRel(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.WordRel()); + } + + public static String getStringWordRelPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.WordRelPos()); + } + + public static String getStringWordRelCanonicalPos(List trees, boolean withContext, boolean withMagicNodes) { + return getString(trees, withContext, withMagicNodes, new NodeShortString.WordRelCanonicalPos()); + } + + + //// Generic Methods /////////////////////////////////////////////////////// + + public static String getString(BasicNode tree, NodeShortString nodeStr) { + return getString(tree, "(", ")", nodeStr); + } + + public static String getString(List trees, boolean withContext, boolean withMagicNodes, NodeShortString nodeStr) { + if (trees.isEmpty()) { + return "(empty-tree)"; + } + String subrootDep = null; + if (!withContext) { + subrootDep = ""; + } + return getString(trees, "(", ")", "#", subrootDep, withMagicNodes, nodeStr); + } + + public static String getString(BasicNode root, String pre, String post, String dep, boolean withMagicNodes, NodeShortString str) { + return getStringSubtree(root, str, pre, post, dep, withMagicNodes).toString().trim(); + } + + public static String getString(BasicNode root, String pre, String post, NodeShortString str) { + return getStringSubtree(root, str, pre, post, null, true).toString().trim(); + } + + public static String getString(BasicNode root, String pre, String post, boolean withMagicNodes, NodeShortString str) { + return getStringSubtree(root, str, pre, post, null, withMagicNodes).toString().trim(); + } + + public static String getString(Collection trees, String pre, String post, String treeSeparator, String dep, boolean withMagicNodes, NodeShortString str) { + List strings = new ArrayList(trees.size()); + for (BasicNode root : trees) { + strings.add(getString(root, pre, post, dep, withMagicNodes, str)); + } + return StringUtil.join(strings, treeSeparator); + } + + public static String getString(Collection trees, String pre, String post, boolean withMagicNodes, NodeShortString str) { + return getString(trees, pre, post, null, "#", withMagicNodes, str); + } + + protected static StringBuffer getStringSubtree(BasicNode subtree, NodeShortString str, String pre, String post, String dep, boolean withMagicNodes) { + final String NULL_TREE_STR = "(null)"; + StringBuffer result = new StringBuffer(); + + if (subtree == null) { + result.append(NULL_TREE_STR); + } + else { + if (subtree.getInfo().getNodeInfo().getWord() != null) { + String nodeDep; + if (dep != null) { + nodeDep = dep; + } + else { + nodeDep = str.toString(subtree); + } + + // "Magic Node" data should just be added to nodeDep + if ( withMagicNodes && + subtree.getInfo().getNodeInfo().getWordLemma()!=null && + MAGIC_NODES.contains(subtree.getInfo().getNodeInfo().getWordLemma())) { + nodeDep += subtree.getInfo().getNodeInfo().getWordLemma(); + } + + result.append(nodeDep); + } + + if (subtree.getChildren() != null) { + for (BasicNode child : subtree.getChildren()) { + result.append(pre); + result.append(getStringSubtree(child, str, pre, post, null, withMagicNodes)); + result.append(post); + } + } + } + + return result; + } + + + // "Magic Nodes" are one with specific importance for a tree/fragment, and should be printed accordingly + public static final String MAGIC_NODE_PREDICATE = "[PRD]"; + public static final String MAGIC_NODE_ARGUMENT = "[ARG]"; + public static final Set MAGIC_NODES = new HashSet(Arrays.asList(new String[] {MAGIC_NODE_PREDICATE, MAGIC_NODE_ARGUMENT})); + +} diff --git a/common/src/main/resources/desc/type/PredicateTruth.xml b/common/src/main/resources/desc/type/PredicateTruth.xml index d6d97b08..4849729e 100644 --- a/common/src/main/resources/desc/type/PredicateTruth.xml +++ b/common/src/main/resources/desc/type/PredicateTruth.xml @@ -1,139 +1,139 @@ - - - TruthAnnotations - Represents a truth value. - 1.0 - - - - eu.excitement.type.predicatetruth.PredicateTruth - This type represents a predicate truth value annotation. -It is an abstract representation from which the different Predicate Truth annotations will inherit (PT+,PT-,PT?). -This annotation covers a single predicate token. - uima.tcas.Annotation - - - eu.excitement.type.predicatetruth.PredicateTruthPositive - This type annotates predicates with PT+. - eu.excitement.type.predicatetruth.PredicateTruth - - - eu.excitement.type.predicatetruth.PredicateTruthNegative - This type annotates predicates with PT-. - eu.excitement.type.predicatetruth.PredicateTruth - - - eu.excitement.type.predicatetruth.PredicateTruthUncertain - This type annotates predicates with PT?. - eu.excitement.type.predicatetruth.PredicateTruth - - - eu.excitement.type.predicatetruth.PredicateTruthNotIdentified - - eu.excitement.type.predicatetruth.PredicateTruth - - - eu.excitement.type.predicatetruth.ClauseTruth - - uima.tcas.Annotation - - - clauseTokens - - uima.cas.FSArray - uima.tcas.Annotation - - - - - eu.excitement.type.predicatetruth.ClauseTruthPositive - - eu.excitement.type.predicatetruth.ClauseTruth - - - eu.excitement.type.predicatetruth.ClauseTruthNegative - - eu.excitement.type.predicatetruth.ClauseTruth - - - eu.excitement.type.predicatetruth.ClauseTruthUncertain - - eu.excitement.type.predicatetruth.ClauseTruth - - - eu.excitement.type.predicatetruth.ClauseTruthNotIdentified - - eu.excitement.type.predicatetruth.ClauseTruth - - - eu.excitement.type.predicatetruth.NegationAndUncertainty - - uima.tcas.Annotation - - - eu.excitement.type.predicatetruth.NegationAndUncertaintyPositive - - eu.excitement.type.predicatetruth.NegationAndUncertainty - - - eu.excitement.type.predicatetruth.NegationAndUncertaintyNegative - - eu.excitement.type.predicatetruth.NegationAndUncertainty - - - eu.excitement.type.predicatetruth.NegationAndUncertaintyUncertain - - eu.excitement.type.predicatetruth.NegationAndUncertainty - - - eu.excitement.type.predicatetruth.PredicateSignature - - uima.tcas.Annotation - - - eu.excitement.type.predicatetruth.PredicateSignaturePositivePositive - - eu.excitement.type.predicatetruth.PredicateSignature - - - eu.excitement.type.predicatetruth.PredicateSignaturePositiveNegative - - eu.excitement.type.predicatetruth.PredicateSignature - - - eu.excitement.type.predicatetruth.PredicateSignaturePositiveUncertain - - eu.excitement.type.predicatetruth.PredicateSignature - - - eu.excitement.type.predicatetruth.PredicateSignatureNegativePositive - - eu.excitement.type.predicatetruth.PredicateSignature - - - eu.excitement.type.predicatetruth.PredicateSignatureNegativeNegative - - eu.excitement.type.predicatetruth.PredicateSignature - - - eu.excitement.type.predicatetruth.PredicateSignatureNegativeUncertain - - eu.excitement.type.predicatetruth.PredicateSignature - - - eu.excitement.type.predicatetruth.PredicateSignatureUncertainPositive - - eu.excitement.type.predicatetruth.PredicateSignature - - - eu.excitement.type.predicatetruth.PredicateSignatureUncertainNegative - - eu.excitement.type.predicatetruth.PredicateSignature - - - eu.excitement.type.predicatetruth.PredicateSignatureUncertainUncertain - - eu.excitement.type.predicatetruth.PredicateSignature - - - + + + TruthAnnotations + Represents a truth value. + 1.0 + + + + eu.excitement.type.predicatetruth.PredicateTruth + This type represents a predicate truth value annotation. +It is an abstract representation from which the different Predicate Truth annotations will inherit (PT+,PT-,PT?). +This annotation covers a single predicate token. + uima.tcas.Annotation + + + eu.excitement.type.predicatetruth.PredicateTruthPositive + This type annotates predicates with PT+. + eu.excitement.type.predicatetruth.PredicateTruth + + + eu.excitement.type.predicatetruth.PredicateTruthNegative + This type annotates predicates with PT-. + eu.excitement.type.predicatetruth.PredicateTruth + + + eu.excitement.type.predicatetruth.PredicateTruthUncertain + This type annotates predicates with PT?. + eu.excitement.type.predicatetruth.PredicateTruth + + + eu.excitement.type.predicatetruth.PredicateTruthNotIdentified + + eu.excitement.type.predicatetruth.PredicateTruth + + + eu.excitement.type.predicatetruth.ClauseTruth + + uima.tcas.Annotation + + + clauseTokens + + uima.cas.FSArray + uima.tcas.Annotation + + + + + eu.excitement.type.predicatetruth.ClauseTruthPositive + + eu.excitement.type.predicatetruth.ClauseTruth + + + eu.excitement.type.predicatetruth.ClauseTruthNegative + + eu.excitement.type.predicatetruth.ClauseTruth + + + eu.excitement.type.predicatetruth.ClauseTruthUncertain + + eu.excitement.type.predicatetruth.ClauseTruth + + + eu.excitement.type.predicatetruth.ClauseTruthNotIdentified + + eu.excitement.type.predicatetruth.ClauseTruth + + + eu.excitement.type.predicatetruth.NegationAndUncertainty + + uima.tcas.Annotation + + + eu.excitement.type.predicatetruth.NegationAndUncertaintyPositive + + eu.excitement.type.predicatetruth.NegationAndUncertainty + + + eu.excitement.type.predicatetruth.NegationAndUncertaintyNegative + + eu.excitement.type.predicatetruth.NegationAndUncertainty + + + eu.excitement.type.predicatetruth.NegationAndUncertaintyUncertain + + eu.excitement.type.predicatetruth.NegationAndUncertainty + + + eu.excitement.type.predicatetruth.PredicateSignature + + uima.tcas.Annotation + + + eu.excitement.type.predicatetruth.PredicateSignaturePositivePositive + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignaturePositiveNegative + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignaturePositiveUncertain + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureNegativePositive + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureNegativeNegative + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureNegativeUncertain + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureUncertainPositive + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureUncertainNegative + + eu.excitement.type.predicatetruth.PredicateSignature + + + eu.excitement.type.predicatetruth.PredicateSignatureUncertainUncertain + + eu.excitement.type.predicatetruth.PredicateSignature + + + diff --git a/core/pom.xml b/core/pom.xml index 2718b392..3a73e1b7 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -3,7 +3,7 @@ eu.excitementproject eop - 1.2.0 + 1.2.1 core core @@ -22,13 +22,13 @@ eu.excitementproject common - 1.2.0 + 1.2.1 eu.excitementproject distsim - 1.2.0 + 1.2.1 @@ -57,7 +57,7 @@ eu.excitementproject lap - 1.2.0 + 1.2.1 unituebingen @@ -159,7 +159,7 @@ eu.excitementproject lexicalminer - 1.2.0 + 1.2.1 diff --git a/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/wikipedia/it/WikipediaLexicalResourceDemoIT.java b/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/wikipedia/it/WikipediaLexicalResourceDemoIT.java index 5a0ffe81..c32c9caa 100644 --- a/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/wikipedia/it/WikipediaLexicalResourceDemoIT.java +++ b/core/src/main/java/eu/excitementproject/eop/core/component/lexicalknowledge/wikipedia/it/WikipediaLexicalResourceDemoIT.java @@ -1,100 +1,100 @@ -package eu.excitementproject.eop.core.component.lexicalknowledge.wikipedia.it; - -import java.io.File; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalRule; -import eu.excitementproject.eop.common.representation.partofspeech.BySimplerCanonicalPartOfSpeech; -import eu.excitementproject.eop.common.representation.partofspeech.PartOfSpeech; -import eu.excitementproject.eop.common.representation.partofspeech.SimplerCanonicalPosTag; -import eu.excitementproject.eop.common.utilities.Utils; -import eu.excitementproject.eop.core.component.lexicalknowledge.wikipedia.WikiExtractionType; -import eu.excitementproject.eop.core.component.lexicalknowledge.wikipedia.WikiRuleInfo; - - -/** - * Demo for LexResource - * - * @author Amnon Lotan - * @author Vivi Nastase (FBK) - * - * @since 06/05/2011 - * - */ -public class WikipediaLexicalResourceDemoIT { - - - /** - * @param args - * @throws Exception - */ - public static void main(String[] args) throws Exception - { - System.out.println("Start \n*****************************\n"); - - - String lLemma = "Italia"; - PartOfSpeech pos2 = new BySimplerCanonicalPartOfSpeech(SimplerCanonicalPosTag.NOUN); - String rLemma = "Venezia"; - System.out.println("Looking for all rules from \"" + lLemma + "\" to \"" + rLemma + "\""); - - // test Wikipedia - System.out.println("\nFrom the new WikiLexicalResource:"); - - Set extractionTypes = Utils.arrayToCollection(new WikiExtractionType[]{WikiExtractionType.REDIRECT,WikiExtractionType.CATEGORY, - WikiExtractionType.LEX_ALL_NOUNS,WikiExtractionType.SYNT_ALL_NOUNS}, new HashSet()); - File stopWordsFile = new File("src/test/resources/stopwords.txt"); -// WikiLexicalResourceIT wikiLexR = new WikiLexicalResourceIT(stopWordsFile, extractionTypes, "jdbc:mysql://nathrezim:3306/wikilexresita","root","nat_2k12", 0.01); -// WikiLexicalResourceIT wikiLexR = new WikiLexicalResourceIT(stopWordsFile, extractionTypes, "jdbc:mysql://hlt-services4:3306/wikilexresita","root","hlt4my2sql", 0.01); - WikiLexicalResourceIT wikiLexR = new WikiLexicalResourceIT(stopWordsFile, extractionTypes, "jdbc:mysql://localhost:3306/wikilexresita2","root","my_nor_2k", 0.01); - - -// ConfigurationFile confFile = new ConfigurationFile(new File("B:/Apps/BIUTEE/workdir/biutee_train.xml")); -// WikiLexicalResource wikiLexR = new WikiLexicalResource(confFile.getModuleConfiguration("Wiki")); - - - List> rules2 = wikiLexR.getRulesForLeft(lLemma, pos2 ); - - System.out.println("Got "+rules2.size() + " for: " + lLemma + ", " + pos2 ); - for (LexicalRule rule : rules2) - System.out.println(rule); - - System.out.println(lLemma +" has " + rules2.size() ); - System.out.println("\n*****************************\n"); - - - - List> otherRules = wikiLexR.getRules(lLemma, null, rLemma, null); - System.out.println("Got "+otherRules.size() + " for: " + lLemma + ", " + pos2 + ", " + rLemma + ", " + pos2); - for (LexicalRule rule : otherRules) - System.out.println(rule); - - System.out.println(lLemma +" and " + rLemma + " have " + otherRules.size()); - - // uncomment these lines to compare to the old resource - this requires you to add project dependencies. - -// System.out.println("\n************************* Old Wiki **********************\n"); -// -// String configurationFileName = "b:/Apps/BIUTEE/workdir/biutee_train.xml"; -// ConfigurationFile confFile = new ConfigurationFile(new File(configurationFileName)); -// ConfigurationParams wikiModule = confFile.getModuleConfiguration("Wiki"); -// -// new ExperimentLoggerNeutralizer().neutralize(); -// // Use the file log4j.properties to initialize log4j -// PropertyConfigurator.configure("log4j.properties"); -//// new LogInitializer(configurationFileName).init(); -// -// WikipediaLexicalRuleBase oldWiki = new WikipediaLexicalRuleBase(wikiModule); -// -// ImmutableSet oldRules = oldWiki.getRules(lLemma, pos2); -// System.out.println("Got "+oldRules.size() + " for: " + lLemma + ", " + pos2 ); -// for (ac.biu.nlp.nlp.engineml.operations.rules.LexicalRule rule : oldRules) -// System.out.println(rule); -// -// System.out.println(lLemma +" has " + oldRules.size() ); -// System.out.println("\n*****************************\n"); - - } -} +package eu.excitementproject.eop.core.component.lexicalknowledge.wikipedia.it; + +import java.io.File; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import eu.excitementproject.eop.common.component.lexicalknowledge.LexicalRule; +import eu.excitementproject.eop.common.representation.partofspeech.BySimplerCanonicalPartOfSpeech; +import eu.excitementproject.eop.common.representation.partofspeech.PartOfSpeech; +import eu.excitementproject.eop.common.representation.partofspeech.SimplerCanonicalPosTag; +import eu.excitementproject.eop.common.utilities.Utils; +import eu.excitementproject.eop.core.component.lexicalknowledge.wikipedia.WikiExtractionType; +import eu.excitementproject.eop.core.component.lexicalknowledge.wikipedia.WikiRuleInfo; + + +/** + * Demo for LexResource + * + * @author Amnon Lotan + * @author Vivi Nastase (FBK) + * + * @since 06/05/2011 + * + */ +public class WikipediaLexicalResourceDemoIT { + + + /** + * @param args + * @throws Exception + */ + public static void main(String[] args) throws Exception + { + System.out.println("Start \n*****************************\n"); + + + String lLemma = "Italia"; + PartOfSpeech pos2 = new BySimplerCanonicalPartOfSpeech(SimplerCanonicalPosTag.NOUN); + String rLemma = "Venezia"; + System.out.println("Looking for all rules from \"" + lLemma + "\" to \"" + rLemma + "\""); + + // test Wikipedia + System.out.println("\nFrom the new WikiLexicalResource:"); + + Set extractionTypes = Utils.arrayToCollection(new WikiExtractionType[]{WikiExtractionType.REDIRECT,WikiExtractionType.CATEGORY, + WikiExtractionType.LEX_ALL_NOUNS,WikiExtractionType.SYNT_ALL_NOUNS}, new HashSet()); + File stopWordsFile = new File("src/test/resources/stopwords.txt"); +// WikiLexicalResourceIT wikiLexR = new WikiLexicalResourceIT(stopWordsFile, extractionTypes, "jdbc:mysql://nathrezim:3306/wikilexresita","root","nat_2k12", 0.01); +// WikiLexicalResourceIT wikiLexR = new WikiLexicalResourceIT(stopWordsFile, extractionTypes, "jdbc:mysql://hlt-services4:3306/wikilexresita","root","hlt4my2sql", 0.01); + WikiLexicalResourceIT wikiLexR = new WikiLexicalResourceIT(stopWordsFile, extractionTypes, "jdbc:mysql://localhost:3306/wikilexresita2","root","my_nor_2k", 0.01); + + +// ConfigurationFile confFile = new ConfigurationFile(new File("B:/Apps/BIUTEE/workdir/biutee_train.xml")); +// WikiLexicalResource wikiLexR = new WikiLexicalResource(confFile.getModuleConfiguration("Wiki")); + + + List> rules2 = wikiLexR.getRulesForLeft(lLemma, pos2 ); + + System.out.println("Got "+rules2.size() + " for: " + lLemma + ", " + pos2 ); + for (LexicalRule rule : rules2) + System.out.println(rule); + + System.out.println(lLemma +" has " + rules2.size() ); + System.out.println("\n*****************************\n"); + + + + List> otherRules = wikiLexR.getRules(lLemma, null, rLemma, null); + System.out.println("Got "+otherRules.size() + " for: " + lLemma + ", " + pos2 + ", " + rLemma + ", " + pos2); + for (LexicalRule rule : otherRules) + System.out.println(rule); + + System.out.println(lLemma +" and " + rLemma + " have " + otherRules.size()); + + // uncomment these lines to compare to the old resource - this requires you to add project dependencies. + +// System.out.println("\n************************* Old Wiki **********************\n"); +// +// String configurationFileName = "b:/Apps/BIUTEE/workdir/biutee_train.xml"; +// ConfigurationFile confFile = new ConfigurationFile(new File(configurationFileName)); +// ConfigurationParams wikiModule = confFile.getModuleConfiguration("Wiki"); +// +// new ExperimentLoggerNeutralizer().neutralize(); +// // Use the file log4j.properties to initialize log4j +// PropertyConfigurator.configure("log4j.properties"); +//// new LogInitializer(configurationFileName).init(); +// +// WikipediaLexicalRuleBase oldWiki = new WikipediaLexicalRuleBase(wikiModule); +// +// ImmutableSet oldRules = oldWiki.getRules(lLemma, pos2); +// System.out.println("Got "+oldRules.size() + " for: " + lLemma + ", " + pos2 ); +// for (ac.biu.nlp.nlp.engineml.operations.rules.LexicalRule rule : oldRules) +// System.out.println(rule); +// +// System.out.println(lLemma +" has " + oldRules.size() ); +// System.out.println("\n*****************************\n"); + + } +} diff --git a/distsim/pom.xml b/distsim/pom.xml index e63e5282..0b2a9d42 100644 --- a/distsim/pom.xml +++ b/distsim/pom.xml @@ -3,7 +3,7 @@ eu.excitementproject eop - 1.2.0 + 1.2.1 distsim distsim @@ -43,19 +43,19 @@ eu.excitementproject common - 1.2.0 + 1.2.1 eu.excitementproject redis - 1.2.0 + 1.2.1 eu.excitementproject lap - 1.2.0 + 1.2.1 " + similarity.getRightElement() + " : " + similarity.getSimilarityMeasure()); } -} +} \ No newline at end of file diff --git a/distsim/src/main/java/eu/excitementproject/eop/distsim/storage/SimilarityFile2Redis.java b/distsim/src/main/java/eu/excitementproject/eop/distsim/storage/SimilarityFile2Redis.java index afa84018..4680309f 100644 --- a/distsim/src/main/java/eu/excitementproject/eop/distsim/storage/SimilarityFile2Redis.java +++ b/distsim/src/main/java/eu/excitementproject/eop/distsim/storage/SimilarityFile2Redis.java @@ -16,6 +16,7 @@ import eu.excitementproject.eop.common.utilities.configuration.ConfigurationParams; import eu.excitementproject.eop.common.utilities.configuration.ImplCommonConfig; import eu.excitementproject.eop.distsim.items.Element; +import eu.excitementproject.eop.distsim.items.Feature; import eu.excitementproject.eop.distsim.items.UndefinedKeyException; import eu.excitementproject.eop.distsim.util.Configuration; import eu.excitementproject.eop.distsim.util.Factory; @@ -34,13 +35,15 @@ public class SimilarityFile2Redis { private static CountableIdentifiableStorage elementStorage = null; + private static CountableIdentifiableStorage featureStorage = null; private static boolean bElementTypeFound = false; + private static boolean bFirstOrder = false; private static Redis redis = null; public static void main(String[] args) { if (args.length != 1) { - System.err.println("Usage: SimilarityFile2Redis "); + System.err.println("Usage: java SimilarityFile2Redis "); System.exit(0); } @@ -55,7 +58,13 @@ public static void main(String[] args) { logger = Logger.getLogger(SimilarityFile2Redis.class); final ConfigurationParams confParams = confFile.getModuleConfiguration(Configuration.FILE_TO_REDIS); - + + try { + bFirstOrder = confParams.getBoolean(Configuration.FIRST_ORDER); + } catch (ConfigurationException e) { + + } + int maxSimilaritiesNum = -1; try { maxSimilaritiesNum = confParams.getInt(Configuration.MAX_SIMILARITIES_PER_ELEMENT); @@ -64,6 +73,14 @@ public static void main(String[] args) { File elementsFile = new File(new java.io.File(confParams.get(Configuration.ELEMENTS_FILE)),true); elementsFile.open(); elementStorage = new MemoryBasedCountableIdentifiableStorage(elementsFile); + elementsFile.close(); + if (bFirstOrder) { + File featuresFile = new File(new java.io.File(confParams.get(Configuration.FEATURES_FILE)),true); + featuresFile.open(); + featureStorage = new MemoryBasedCountableIdentifiableStorage(featuresFile); + featuresFile.close(); + } + File file; //try { file = (File)Factory.create(confParams.get(Configuration.CLASS),new java.io.File(confParams.get(Configuration.FILE)),true); @@ -86,7 +103,7 @@ public static void main(String[] args) { else { int element1Id = pair.getFirst(); - String element1key = getElementKey(element1Id); + String element1key = getElement1Key(element1Id); //tmp /*String element1key = null; try { @@ -101,7 +118,7 @@ public static void main(String[] args) { for (Entry elementScore : sortedElementScores.entrySet()) { int element2Id = elementScore.getKey(); double score = elementScore.getValue(); - String element2key = getElementKey(element2Id); + String element2key = getElement2Key(element2Id); //tmp /*String element2key = null; try { @@ -132,7 +149,16 @@ public static void main(String[] args) { } } - protected static String getElementKey(int element1Id) throws ItemNotFoundException, SerializationException, UndefinedKeyException { + protected static String getElement2Key(int element2Id) throws ItemNotFoundException, SerializationException, UndefinedKeyException { + if (!bFirstOrder) + return getElement1Key(element2Id); + else { + Feature feature = featureStorage.getData(element2Id); + return feature.toKey(); + } + } + + protected static String getElement1Key(int element1Id) throws ItemNotFoundException, SerializationException, UndefinedKeyException { Element element = elementStorage.getData(element1Id); if (!bElementTypeFound && redis != null) { redis.write(RedisBasedStringListBasicMap.ELEMENT_CLASS_NAME_KEY,element.getClass().getName()); diff --git a/distsim/src/main/java/eu/excitementproject/eop/distsim/util/Configuration.java b/distsim/src/main/java/eu/excitementproject/eop/distsim/util/Configuration.java index b2cea71c..9b8355b0 100644 --- a/distsim/src/main/java/eu/excitementproject/eop/distsim/util/Configuration.java +++ b/distsim/src/main/java/eu/excitementproject/eop/distsim/util/Configuration.java @@ -126,7 +126,8 @@ public class Configuration { public static final String REDIS_FILE = "redis-file"; public static final String START_ELEMENT_ID = "stasrt-element-id"; public static final String MIN_FEATURES_SIZE = "min-features-size"; - public static final String MAX_SIMILARITIES_PER_ELEMENT = "max-similarities-per-element"; + public static final String MAX_SIMILARITIES_PER_ELEMENT = "max-similarities-per-element"; + public static final String FIRST_ORDER = "first-order"; } diff --git a/distsim/src/main/resources/demo/configurations/bap/elements-similarities-left-to-redis.xml b/distsim/src/main/resources/demo/configurations/bap/elements-similarities-left-to-redis.xml index 7bd21958..ccbcdcfc 100644 --- a/distsim/src/main/resources/demo/configurations/bap/elements-similarities-left-to-redis.xml +++ b/distsim/src/main/resources/demo/configurations/bap/elements-similarities-left-to-redis.xml @@ -19,6 +19,14 @@ models/bap/elements + + models/bap/features + + + false + + models/bap/similarity-l2r.rdb diff --git a/distsim/src/main/resources/demo/configurations/bap/elements-similarities-right-to-redis.xml b/distsim/src/main/resources/demo/configurations/bap/elements-similarities-right-to-redis.xml index b2bb5e17..46c27e7e 100644 --- a/distsim/src/main/resources/demo/configurations/bap/elements-similarities-right-to-redis.xml +++ b/distsim/src/main/resources/demo/configurations/bap/elements-similarities-right-to-redis.xml @@ -25,6 +25,14 @@ 100 + + models/bap/features + + + false + + diff --git a/distsim/src/main/resources/demo/configurations/lin/dependency/elements-similarities-left-to-redis.xml b/distsim/src/main/resources/demo/configurations/lin/dependency/elements-similarities-left-to-redis.xml index 4886cbcb..0542cccb 100644 --- a/distsim/src/main/resources/demo/configurations/lin/dependency/elements-similarities-left-to-redis.xml +++ b/distsim/src/main/resources/demo/configurations/lin/dependency/elements-similarities-left-to-redis.xml @@ -25,6 +25,14 @@ 100 + + models/lin/dependency/features + + + false + + diff --git a/distsim/src/main/resources/demo/configurations/lin/dependency/elements-similarities-right-to-redis.xml b/distsim/src/main/resources/demo/configurations/lin/dependency/elements-similarities-right-to-redis.xml index 7b46223d..b2c18e9e 100644 --- a/distsim/src/main/resources/demo/configurations/lin/dependency/elements-similarities-right-to-redis.xml +++ b/distsim/src/main/resources/demo/configurations/lin/dependency/elements-similarities-right-to-redis.xml @@ -25,6 +25,13 @@ 100 + + models/lin/dependency/features + + + false + diff --git a/distsim/src/main/resources/demo/configurations/lin/proximity/elements-similarities-left-to-redis.xml b/distsim/src/main/resources/demo/configurations/lin/proximity/elements-similarities-left-to-redis.xml index 6f185b94..303fe90d 100644 --- a/distsim/src/main/resources/demo/configurations/lin/proximity/elements-similarities-left-to-redis.xml +++ b/distsim/src/main/resources/demo/configurations/lin/proximity/elements-similarities-left-to-redis.xml @@ -25,6 +25,14 @@ 100 + + models/lin/proximity/features + + + false + + diff --git a/distsim/src/main/resources/demo/configurations/lin/proximity/elements-similarities-right-to-redis.xml b/distsim/src/main/resources/demo/configurations/lin/proximity/elements-similarities-right-to-redis.xml index 94e04f9f..188a4fb0 100644 --- a/distsim/src/main/resources/demo/configurations/lin/proximity/elements-similarities-right-to-redis.xml +++ b/distsim/src/main/resources/demo/configurations/lin/proximity/elements-similarities-right-to-redis.xml @@ -25,6 +25,13 @@ 100 + + models/lin/proximity/features + + + false + diff --git a/globalgraphoptimizer/pom.xml b/globalgraphoptimizer/pom.xml index 01d112d3..137d7675 100644 --- a/globalgraphoptimizer/pom.xml +++ b/globalgraphoptimizer/pom.xml @@ -4,7 +4,7 @@ eu.excitementproject eop - 1.2.0 + 1.2.1 globalgraphoptimizer @@ -19,7 +19,7 @@ eu.excitementproject common - 1.2.0 + 1.2.1 diff --git a/lap/pom.xml b/lap/pom.xml index 6b348558..ea3ccfe3 100644 --- a/lap/pom.xml +++ b/lap/pom.xml @@ -5,7 +5,7 @@ eu.excitementproject eop - 1.2.0 + 1.2.1 lap @@ -44,6 +44,18 @@ + + + + eu.fbk + de.tudarmstadt.ukp.dkpro.core.api.lexmorph-asl + 1.4.0 + + junit junit @@ -89,7 +101,7 @@ eu.excitementproject common - 1.2.0 + 1.2.1 de.tudarmstadt.ukp.dkpro.core diff --git a/lap/src/scripts/treetagger/build.xml b/lap/src/scripts/treetagger/build.xml index 7e51dec4..2a819f4d 100644 --- a/lap/src/scripts/treetagger/build.xml +++ b/lap/src/scripts/treetagger/build.xml @@ -209,7 +209,7 @@ platform="windows-x86_64" file="tree-tagger.exe" md5="c88d7fe1aa63bebaccfa019c222f54ea"/> - + - - - - - - org.codehaus.mojo - gwt-maven-plugin - 2.6.1 - - - - - compile - - - - - - - - - - diff --git a/tracer/src/main/java/eu/excitementproject/eop/tracer/client/EntailmentService.java b/tracer/src/main/java/eu/excitementproject/eop/tracer/client/EntailmentService.java deleted file mode 100644 index 59687a2a..00000000 --- a/tracer/src/main/java/eu/excitementproject/eop/tracer/client/EntailmentService.java +++ /dev/null @@ -1,22 +0,0 @@ -package eu.excitementproject.eop.tracer.client; - -import com.google.gwt.user.client.rpc.RemoteService; - - -import com.google.gwt.user.client.rpc.RemoteServiceRelativePath; - -import eu.excitementproject.eop.tracer.shared.EntailmentServiceException; - -/** - * The client side stub for the RPC service. - */ -@RemoteServiceRelativePath("entailment") -public interface EntailmentService extends RemoteService { - //int init(); - String resolve(String text,String hypothesis) throws EntailmentServiceException; - //Set getAnnotations(int id); - //Set getAlignments(int id); - //String getDecision(int id); - //int close(); -} - \ No newline at end of file diff --git a/tracer/src/main/java/eu/excitementproject/eop/tracer/client/EntailmentServiceAsync.java b/tracer/src/main/java/eu/excitementproject/eop/tracer/client/EntailmentServiceAsync.java deleted file mode 100644 index 11df5421..00000000 --- a/tracer/src/main/java/eu/excitementproject/eop/tracer/client/EntailmentServiceAsync.java +++ /dev/null @@ -1,16 +0,0 @@ -package eu.excitementproject.eop.tracer.client; - -//import org.apache.uima.jcas.JCas; - -import com.google.gwt.user.client.rpc.AsyncCallback; - - - -import eu.excitementproject.eop.tracer.shared.EntailmentServiceException; - -/** - * The async counterpart of SearchService. - */ -public interface EntailmentServiceAsync { - void resolve(String text, String hypotheis, AsyncCallback callback) throws EntailmentServiceException; -} diff --git a/tracer/src/main/java/eu/excitementproject/eop/tracer/client/Tracer.java b/tracer/src/main/java/eu/excitementproject/eop/tracer/client/Tracer.java deleted file mode 100644 index cdeb80cf..00000000 --- a/tracer/src/main/java/eu/excitementproject/eop/tracer/client/Tracer.java +++ /dev/null @@ -1,53 +0,0 @@ -package eu.excitementproject.eop.tracer.client; - -import com.google.gwt.core.client.EntryPoint; - -import com.google.gwt.user.client.rpc.AsyncCallback; -import com.google.gwt.user.client.ui.Label; -import com.google.gwt.user.client.ui.RootPanel; -import com.google.gwt.user.client.ui.VerticalPanel; -import com.google.gwt.core.client.GWT; - -/** - * Entry point classes define onModuleLoad(). - */ -public class Tracer implements EntryPoint { - - /** - * Create a remote service proxy to talk to the server-side search service. - */ - private final EntailmentServiceAsync entailmentService = GWT.create(EntailmentService.class); - - private final VerticalPanel mainPanel = new VerticalPanel(); - private final Label label = new Label(); - /** - * This is the entry point method. - */ - public void onModuleLoad() { - - //JCas jcas; - - mainPanel.add(label); - - try{ - entailmentService.resolve("","", - new AsyncCallback() { - - public void onFailure(Throwable e) { - label.setText(e.toString()); - } - - public void onSuccess(String jcas) { - label.setText(jcas.toString()); - } - - } - ); - - RootPanel.get("TracingApplication").add(mainPanel); - - } catch (Exception e) { - label.setText(e.toString()); - } - } -} diff --git a/tracer/src/main/java/eu/excitementproject/eop/tracer/server/EntailmentServiceImpl.java b/tracer/src/main/java/eu/excitementproject/eop/tracer/server/EntailmentServiceImpl.java deleted file mode 100644 index b0580336..00000000 --- a/tracer/src/main/java/eu/excitementproject/eop/tracer/server/EntailmentServiceImpl.java +++ /dev/null @@ -1,34 +0,0 @@ -package eu.excitementproject.eop.tracer.server; - - -import com.google.gwt.user.server.rpc.RemoteServiceServlet; - - -import eu.excitementproject.eop.tracer.client.EntailmentService; -import eu.excitementproject.eop.tracer.shared.EntailmentServiceException; -import eu.excitementproject.eop.common.utilities.uima.UimaUtils; -import eu.excitementproject.eop.common.utilities.uima.UimaUtilsException; - - -public class EntailmentServiceImpl extends RemoteServiceServlet implements EntailmentService { - - /** - * - */ - private static final long serialVersionUID = 1L; - - public EntailmentServiceImpl() { - } - - @Override - public String resolve(String text,String hypothesis) throws EntailmentServiceException { - try { - @SuppressWarnings("unused") - org.apache.uima.jcas.JCas jcas = UimaUtils.newJcas(); - return "Hello World"; - } catch (UimaUtilsException e) { - throw new EntailmentServiceException(e); - } - } - -} diff --git a/tracer/src/main/java/eu/excitementproject/eop/tracer/shared/EntailmentServiceException.java b/tracer/src/main/java/eu/excitementproject/eop/tracer/shared/EntailmentServiceException.java deleted file mode 100644 index 34065cf2..00000000 --- a/tracer/src/main/java/eu/excitementproject/eop/tracer/shared/EntailmentServiceException.java +++ /dev/null @@ -1,51 +0,0 @@ -/** - * - */ -package eu.excitementproject.eop.tracer.shared; - -/** - * @author Meni Adler - * @since May 21, 2014 - * - */ -public class EntailmentServiceException extends Exception { - - /** - * - */ - private static final long serialVersionUID = 1L; - - /** - * - */ - public EntailmentServiceException() { - // TODO Auto-generated constructor stub - } - - /** - * @param arg0 - */ - public EntailmentServiceException(String arg0) { - super(arg0); - // TODO Auto-generated constructor stub - } - - /** - * @param arg0 - */ - public EntailmentServiceException(Throwable arg0) { - super(arg0); - // TODO Auto-generated constructor stub - } - - /** - * @param arg0 - * @param arg1 - */ - public EntailmentServiceException(String arg0, Throwable arg1) { - super(arg0, arg1); - // TODO Auto-generated constructor stub - } - - -} diff --git a/tracer/src/main/java/tracer.gwt.xml b/tracer/src/main/java/tracer.gwt.xml deleted file mode 100644 index c28a370a..00000000 --- a/tracer/src/main/java/tracer.gwt.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tracer/src/main/webapp/ExploratorySearch.css b/tracer/src/main/webapp/ExploratorySearch.css deleted file mode 100644 index 4ec686d0..00000000 --- a/tracer/src/main/webapp/ExploratorySearch.css +++ /dev/null @@ -1,38 +0,0 @@ -/** Add css rules here for your application. */ - - -/** Most GWT widgets already have a style name defined */ -.gwt-DialogBox { - width: 400px; -} - -.dialogVPanel { - margin: 5px; -} - -.serverResponseLabelError { - color: red; -} - -.categories { - padding-top: 40px; - padding-bottom: 0px; -} - -.instances { - padding-top: 80px; - padding-bottom: 0px; -} - - -/*.gwt-Tree .gwt-TreeItem-selected { - background: blue; - color:white; -} */ - - -/** Set ids using widget.getElement().setId("idOfElement") */ -#closeButton { - margin: 15px 6px 6px; -} - diff --git a/tracer/src/main/webapp/WEB-INF/web.xml b/tracer/src/main/webapp/WEB-INF/web.xml deleted file mode 100644 index 60bb6260..00000000 --- a/tracer/src/main/webapp/WEB-INF/web.xml +++ /dev/null @@ -1,25 +0,0 @@ - - - - - - - - - entailmentServlet - /tracer/entailment - - - - entailmentServlet - eu.excitementproject.eop.tracer.server.EntailmentServiceImpl - - - - - tracer.html - - - diff --git a/tracer/src/main/webapp/about.html b/tracer/src/main/webapp/about.html deleted file mode 100644 index 893cd3c0..00000000 --- a/tracer/src/main/webapp/about.html +++ /dev/null @@ -1,70 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - BIU Health-care Exploration System - - - - - - - - - - - - - - - -
  - -

- - Health-care Exploration System -

- -

- Explore your search results by drilling down and up the medical concepts, their semantic relations, and their mentions in the text. - - XYZ is We present a novel text exploration model, -which extends the scope of state-of-the-art -technologies by moving from standard concept- -based exploration to statement-based exploration. -The proposed scheme utilizes the -textual entailment relation between statements -as the basis of the exploration process. A user -of our system can explore the result space of -a query by drilling down/up from one statement -to another, according to entailment relations -specified by an entailment graph and -an optional concept taxonomy. As a prominent -use case, we apply our exploration system -and illustrate its benefit on the health-care -domain. To the best of our knowledge this is -the first implementation of an exploration system -at the statement level that is based on the -textual entailment relation. -

-
- -
- - diff --git a/tracer/src/main/webapp/tracer.html b/tracer/src/main/webapp/tracer.html deleted file mode 100644 index c040e672..00000000 --- a/tracer/src/main/webapp/tracer.html +++ /dev/null @@ -1,62 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - EOP Tracer - - - - - - - - - - - - - - - - - - - - - - - - - - -
  - -

- EOP Tracer -

- -
-
-
- -
  -
-
- - diff --git a/tracer/src/test/java/eu/excitementproject/eop/tracer/AppTest.java b/tracer/src/test/java/eu/excitementproject/eop/tracer/AppTest.java deleted file mode 100644 index 6c366b0a..00000000 --- a/tracer/src/test/java/eu/excitementproject/eop/tracer/AppTest.java +++ /dev/null @@ -1,38 +0,0 @@ -package eu.excitementproject.eop.tracer; - -import junit.framework.Test; -import junit.framework.TestCase; -import junit.framework.TestSuite; - -/** - * Unit test for simple App. - */ -public class AppTest - extends TestCase -{ - /** - * Create the test case - * - * @param testName name of the test case - */ - public AppTest( String testName ) - { - super( testName ); - } - - /** - * @return the suite of tests being tested - */ - public static Test suite() - { - return new TestSuite( AppTest.class ); - } - - /** - * Rigourous Test :-) - */ - public void testApp() - { - assertTrue( true ); - } -} diff --git a/transformations/pom.xml b/transformations/pom.xml index 461f6071..7042a7b6 100644 --- a/transformations/pom.xml +++ b/transformations/pom.xml @@ -4,7 +4,7 @@ eu.excitementproject eop - 1.2.0 + 1.2.1 transformations transformations @@ -31,23 +31,23 @@ eu.excitementproject common - 1.2.0 + 1.2.1 eu.excitementproject lap - 1.2.0 + 1.2.1 eu.excitementproject core - 1.2.0 + 1.2.1 eu.excitementproject lexicalminer - 1.2.0 + 1.2.1 diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruth.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruth.java index d8eaba72..56ba3420 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruth.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruth.java @@ -1,59 +1,59 @@ -package eu.excitementproject.eop.transformations.biu.en.predicatetruth; - -import java.util.List; - -import eu.excitementproject.eop.lap.biu.ner.NamedEntityRecognizerException; -import eu.excitementproject.eop.transformations.generic.truthteller.conll.ConllConverterException; -import eu.excitementproject.eop.transformations.representation.ExtendedNode; - -/** - * An interface for Truth annotations components - * - * @author Gabi Stanovsky - * @since Aug 2014 - */ - -public interface PredicateTruth { - - /** - * Call this method only once. - *

- * Call the method to initialize the {@linkplain PredicateTruth} - *

- * Don't call other methods of this interface before calling {@linkplain #init()} method. - * @throws ConllConverterException - * @throws NamedEntityRecognizerException An error occured while trying to initialize. - */ - public void init() throws PredicateTruthException; - - /** - * Set a sentence to the {@linkplain PredicateTruth}. - * */ - public void setSentence(ExtendedNode annotatedSentence); - - /** - * Assigns truth value to the words in the sentence.
- * Assigns null for words which don't have truth values. - * @throws PredicateTruthException on Any error - * @throws ConllConverterException - */ - public void annotate() throws PredicateTruthException; - - /** - *

Call this method only after calling {@link #annotate()} method. - * @return a List of corresponding to truth values according to the position of the token in the sentence - * as assigned by the truth annotator (by the {@link #annotate} method). - */ - public List getAnnotatedEntities(); - - /** - * Call this method once you have finished using the {@link PredicateTruth}, - * and you will not use it any more. - *

- * I.e. DON'T call it each time you are done with a sentence, but - * only once there are no more sentences to be annotated any more. - */ - public void cleanUp(); -} - - +package eu.excitementproject.eop.transformations.biu.en.predicatetruth; + +import java.util.List; + +import eu.excitementproject.eop.lap.biu.ner.NamedEntityRecognizerException; +import eu.excitementproject.eop.transformations.generic.truthteller.conll.ConllConverterException; +import eu.excitementproject.eop.transformations.representation.ExtendedNode; + +/** + * An interface for Truth annotations components + * + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public interface PredicateTruth { + + /** + * Call this method only once. + *

+ * Call the method to initialize the {@linkplain PredicateTruth} + *

+ * Don't call other methods of this interface before calling {@linkplain #init()} method. + * @throws ConllConverterException + * @throws NamedEntityRecognizerException An error occured while trying to initialize. + */ + public void init() throws PredicateTruthException; + + /** + * Set a sentence to the {@linkplain PredicateTruth}. + * */ + public void setSentence(ExtendedNode annotatedSentence); + + /** + * Assigns truth value to the words in the sentence.
+ * Assigns null for words which don't have truth values. + * @throws PredicateTruthException on Any error + * @throws ConllConverterException + */ + public void annotate() throws PredicateTruthException; + + /** + *

Call this method only after calling {@link #annotate()} method. + * @return a List of corresponding to truth values according to the position of the token in the sentence + * as assigned by the truth annotator (by the {@link #annotate} method). + */ + public List getAnnotatedEntities(); + + /** + * Call this method once you have finished using the {@link PredicateTruth}, + * and you will not use it any more. + *

+ * I.e. DON'T call it each time you are done with a sentence, but + * only once there are no more sentences to be annotated any more. + */ + public void cleanUp(); +} + + diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruthException.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruthException.java index 3cefc67b..a7eb4e3f 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruthException.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/PredicateTruthException.java @@ -1,23 +1,23 @@ -package eu.excitementproject.eop.transformations.biu.en.predicatetruth; - - -/** - * Exception class thrown by {@link PredicateTruth} in any error case. - * @author Gabi Stanovsky - * @since Aug 2014 - */ -public class PredicateTruthException extends Exception -{ - private static final long serialVersionUID = 1L; - - public PredicateTruthException(String message, Throwable cause) - { - super(message, cause); - } - - public PredicateTruthException(String message) - { - super(message); - } - -} +package eu.excitementproject.eop.transformations.biu.en.predicatetruth; + + +/** + * Exception class thrown by {@link PredicateTruth} in any error case. + * @author Gabi Stanovsky + * @since Aug 2014 + */ +public class PredicateTruthException extends Exception +{ + private static final long serialVersionUID = 1L; + + public PredicateTruthException(String message, Throwable cause) + { + super(message, cause); + } + + public PredicateTruthException(String message) + { + super(message); + } + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/SingleTokenTruthAnnotation.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/SingleTokenTruthAnnotation.java index b8840dcd..dcb74851 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/SingleTokenTruthAnnotation.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/SingleTokenTruthAnnotation.java @@ -1,89 +1,89 @@ -package eu.excitementproject.eop.transformations.biu.en.predicatetruth; - -import java.util.List; - -import eu.excitementproject.eop.transformations.representation.ExtendedNode; -import eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth; -import eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty; -import eu.excitementproject.eop.transformations.representation.annotations.PredTruth; -import eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature; - -/** - * A container for the result of truth annotation on a single token - * contains Predicate Truth, Clause Truth, Negation and Uncertainty, and Predicate Signature - * If any of these is missing, an empty string is expected. - * @author Gabi Stanovsky - * @since Aug 2014 - */ - -public class SingleTokenTruthAnnotation { - - private PredTruth predicateTruthValue; - private ClauseTruth clauseTruthValue; - private NegationAndUncertainty nuValue; - private PredicateSignature predicateSignatureValue; - - private List subtree; - private Integer subtreeMinimalIndex,subtreeMaximalIndex; - - public SingleTokenTruthAnnotation(PredTruth pt,ClauseTruth ct, NegationAndUncertainty nu, PredicateSignature sig, List sub){ - predicateTruthValue = pt; - clauseTruthValue = ct; - nuValue = nu; - predicateSignatureValue = sig; - subtree=sub; - subtreeMaximalIndex = null; - subtreeMinimalIndex = null; - } - - public SingleTokenTruthAnnotation(PredTruth pt,ClauseTruth ct, NegationAndUncertainty nu, PredicateSignature sig){ - this(pt,ct,nu,sig,null); - } - - - - public int getSubtreeMinimalIndex() { - return subtreeMinimalIndex; - } - - public void setSubtreeMinimalIndex(int subtreeMinimalIndex) { - this.subtreeMinimalIndex = subtreeMinimalIndex; - } - - public int getSubtreeMaximalIndex() { - return subtreeMaximalIndex; - } - - public void setSubtreeMaximalIndex(int subtreeMaximalIndex) { - this.subtreeMaximalIndex = subtreeMaximalIndex; - } - - public PredTruth getPredicateTruthValue() { - return predicateTruthValue; - } - - public ClauseTruth getClauseTruthValue() { - return clauseTruthValue; - } - - public NegationAndUncertainty getNuValue() { - return nuValue; - } - - public PredicateSignature getPredicateSignatureValue() { - return predicateSignatureValue; - } - - public List getSubtree() { - return subtree; - } - - - public void setSubtree(List subtree) { - this.subtree = subtree; - } - - - - -} +package eu.excitementproject.eop.transformations.biu.en.predicatetruth; + +import java.util.List; + +import eu.excitementproject.eop.transformations.representation.ExtendedNode; +import eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth; +import eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty; +import eu.excitementproject.eop.transformations.representation.annotations.PredTruth; +import eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature; + +/** + * A container for the result of truth annotation on a single token + * contains Predicate Truth, Clause Truth, Negation and Uncertainty, and Predicate Signature + * If any of these is missing, an empty string is expected. + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public class SingleTokenTruthAnnotation { + + private PredTruth predicateTruthValue; + private ClauseTruth clauseTruthValue; + private NegationAndUncertainty nuValue; + private PredicateSignature predicateSignatureValue; + + private List subtree; + private Integer subtreeMinimalIndex,subtreeMaximalIndex; + + public SingleTokenTruthAnnotation(PredTruth pt,ClauseTruth ct, NegationAndUncertainty nu, PredicateSignature sig, List sub){ + predicateTruthValue = pt; + clauseTruthValue = ct; + nuValue = nu; + predicateSignatureValue = sig; + subtree=sub; + subtreeMaximalIndex = null; + subtreeMinimalIndex = null; + } + + public SingleTokenTruthAnnotation(PredTruth pt,ClauseTruth ct, NegationAndUncertainty nu, PredicateSignature sig){ + this(pt,ct,nu,sig,null); + } + + + + public int getSubtreeMinimalIndex() { + return subtreeMinimalIndex; + } + + public void setSubtreeMinimalIndex(int subtreeMinimalIndex) { + this.subtreeMinimalIndex = subtreeMinimalIndex; + } + + public int getSubtreeMaximalIndex() { + return subtreeMaximalIndex; + } + + public void setSubtreeMaximalIndex(int subtreeMaximalIndex) { + this.subtreeMaximalIndex = subtreeMaximalIndex; + } + + public PredTruth getPredicateTruthValue() { + return predicateTruthValue; + } + + public ClauseTruth getClauseTruthValue() { + return clauseTruthValue; + } + + public NegationAndUncertainty getNuValue() { + return nuValue; + } + + public PredicateSignature getPredicateSignatureValue() { + return predicateSignatureValue; + } + + public List getSubtree() { + return subtree; + } + + + public void setSubtree(List subtree) { + this.subtree = subtree; + } + + + + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotator.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotator.java index 1cea1874..44332bfc 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotator.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotator.java @@ -1,128 +1,128 @@ -package eu.excitementproject.eop.transformations.biu.en.predicatetruth; - -import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import eu.excitementproject.eop.common.representation.parse.tree.AbstractNodeUtils; -import eu.excitementproject.eop.transformations.generic.truthteller.AnnotatorException; -import eu.excitementproject.eop.transformations.generic.truthteller.DefaultSentenceAnnotator; -import eu.excitementproject.eop.transformations.representation.AdditionalNodeInformation; -import eu.excitementproject.eop.transformations.representation.ExtendedNode; - -/** - * An implementation of the "inner tool" of the analysis engine, - * serves a wrapper for the TruthTeller calls. - * @author Gabi Stanovsky - * @since Aug 2014 - */ - -public class TruthTellerAnnotator implements PredicateTruth { - - - private DefaultSentenceAnnotator annotator; - private File annotationRulesFile; - private ExtendedNode annotatedSentence; - private List annotationResult; - - /** - * Constructor which receives the annotation rules file - * @param IannotationRulesFile - * @throws PredicateTruthException - */ - public TruthTellerAnnotator(File annotationRulesFile) throws PredicateTruthException{ - this.annotationRulesFile = annotationRulesFile; - annotatedSentence = null; - } - - @Override - public void init() throws PredicateTruthException { - try { - annotator = new DefaultSentenceAnnotator(annotationRulesFile); - } catch (AnnotatorException e) { - throw new PredicateTruthException(e.getMessage(),e); - } - } - - @Override - public void setSentence(ExtendedNode annotatedSentence) { - this.annotatedSentence = annotatedSentence; - // clear annotation result - annotationResult = new ArrayList(); - } - - @Override - public void annotate() throws PredicateTruthException { - try { - // verify that setSentence was run before calling this function - if (annotatedSentence == null){ - throw new PredicateTruthException("annotate was called without first calling setSentence"); - } - // run TruthTeller - annotator.setTree(annotatedSentence); - annotator.annotate(); - ExtendedNode ttResult = annotator.getAnnotatedTree(); - Map annotationMap = new HashMap(); //needed since truth annotations won't be read in the sentence order - - // iterate over nodes and extract annotations to UIMA format - List nodes = AbstractNodeUtils.treeToList(ttResult); - - for (ExtendedNode node : nodes){ - int serial = node.getInfo().getNodeInfo().getSerial()-1; // this node's id in the original sentence - AdditionalNodeInformation info = node.getInfo().getAdditionalNodeInformation(); - // store result from info, according to index in the original sentence - SingleTokenTruthAnnotation singleTokenAnnotation =new SingleTokenTruthAnnotation(info.getPredTruth(),info.getClauseTruth(),info.getNegationAndUncertainty(),info.getPredicateSignature()); - - if (singleTokenAnnotation.getClauseTruthValue() !=null){ - // get a list of all subtree tokens, by getting the deep antecedent of all - // the subtree, and storing in the set - thus obtaining a unique copy of all "real" tokens - int minimalIndex = -1,maximalIndex = -1; // variables to store the boundaries of the subtree - Set subtree = new HashSet(); - for (ExtendedNode child : AbstractNodeUtils.treeToList(node)){ - ExtendedNode toAdd =AbstractNodeUtils.getDeepAntecedentOf(child); - int curId = node.getInfo().getNodeInfo().getSerial()-1; - subtree.add(toAdd); - // calculate boundaries - if ((minimalIndex == -1)||(curId < minimalIndex)){ - minimalIndex = curId; - } - if ((maximalIndex == -1)||(curId > maximalIndex)){ - maximalIndex = curId; - } - } - - // store the subtree and its boundaries - singleTokenAnnotation.setSubtree(new ArrayList(subtree)); - singleTokenAnnotation.setSubtreeMinimalIndex(minimalIndex); - singleTokenAnnotation.setSubtreeMaximalIndex(maximalIndex); - } - annotationMap.put(serial,singleTokenAnnotation); - - } - - //convert the map into a list - assumes there's a truth annotation for each token index - for (int i=0; i < annotationMap.size();i++){ - annotationResult.add(annotationMap.get(i)); - } - - } catch (AnnotatorException e) { - throw new PredicateTruthException(e.getMessage(),e); - } - } - - @Override - public List getAnnotatedEntities() { - return annotationResult; - } - - @Override - public void cleanUp() { - // stub - nothing to do to close TruthTeller - - } - -} +package eu.excitementproject.eop.transformations.biu.en.predicatetruth; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import eu.excitementproject.eop.common.representation.parse.tree.AbstractNodeUtils; +import eu.excitementproject.eop.transformations.generic.truthteller.AnnotatorException; +import eu.excitementproject.eop.transformations.generic.truthteller.DefaultSentenceAnnotator; +import eu.excitementproject.eop.transformations.representation.AdditionalNodeInformation; +import eu.excitementproject.eop.transformations.representation.ExtendedNode; + +/** + * An implementation of the "inner tool" of the analysis engine, + * serves a wrapper for the TruthTeller calls. + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public class TruthTellerAnnotator implements PredicateTruth { + + + private DefaultSentenceAnnotator annotator; + private File annotationRulesFile; + private ExtendedNode annotatedSentence; + private List annotationResult; + + /** + * Constructor which receives the annotation rules file + * @param IannotationRulesFile + * @throws PredicateTruthException + */ + public TruthTellerAnnotator(File annotationRulesFile) throws PredicateTruthException{ + this.annotationRulesFile = annotationRulesFile; + annotatedSentence = null; + } + + @Override + public void init() throws PredicateTruthException { + try { + annotator = new DefaultSentenceAnnotator(annotationRulesFile); + } catch (AnnotatorException e) { + throw new PredicateTruthException(e.getMessage(),e); + } + } + + @Override + public void setSentence(ExtendedNode annotatedSentence) { + this.annotatedSentence = annotatedSentence; + // clear annotation result + annotationResult = new ArrayList(); + } + + @Override + public void annotate() throws PredicateTruthException { + try { + // verify that setSentence was run before calling this function + if (annotatedSentence == null){ + throw new PredicateTruthException("annotate was called without first calling setSentence"); + } + // run TruthTeller + annotator.setTree(annotatedSentence); + annotator.annotate(); + ExtendedNode ttResult = annotator.getAnnotatedTree(); + Map annotationMap = new HashMap(); //needed since truth annotations won't be read in the sentence order + + // iterate over nodes and extract annotations to UIMA format + List nodes = AbstractNodeUtils.treeToList(ttResult); + + for (ExtendedNode node : nodes){ + int serial = node.getInfo().getNodeInfo().getSerial()-1; // this node's id in the original sentence + AdditionalNodeInformation info = node.getInfo().getAdditionalNodeInformation(); + // store result from info, according to index in the original sentence + SingleTokenTruthAnnotation singleTokenAnnotation =new SingleTokenTruthAnnotation(info.getPredTruth(),info.getClauseTruth(),info.getNegationAndUncertainty(),info.getPredicateSignature()); + + if (singleTokenAnnotation.getClauseTruthValue() !=null){ + // get a list of all subtree tokens, by getting the deep antecedent of all + // the subtree, and storing in the set - thus obtaining a unique copy of all "real" tokens + int minimalIndex = -1,maximalIndex = -1; // variables to store the boundaries of the subtree + Set subtree = new HashSet(); + for (ExtendedNode child : AbstractNodeUtils.treeToList(node)){ + ExtendedNode toAdd =AbstractNodeUtils.getDeepAntecedentOf(child); + int curId = node.getInfo().getNodeInfo().getSerial()-1; + subtree.add(toAdd); + // calculate boundaries + if ((minimalIndex == -1)||(curId < minimalIndex)){ + minimalIndex = curId; + } + if ((maximalIndex == -1)||(curId > maximalIndex)){ + maximalIndex = curId; + } + } + + // store the subtree and its boundaries + singleTokenAnnotation.setSubtree(new ArrayList(subtree)); + singleTokenAnnotation.setSubtreeMinimalIndex(minimalIndex); + singleTokenAnnotation.setSubtreeMaximalIndex(maximalIndex); + } + annotationMap.put(serial,singleTokenAnnotation); + + } + + //convert the map into a list - assumes there's a truth annotation for each token index + for (int i=0; i < annotationMap.size();i++){ + annotationResult.add(annotationMap.get(i)); + } + + } catch (AnnotatorException e) { + throw new PredicateTruthException(e.getMessage(),e); + } + } + + @Override + public List getAnnotatedEntities() { + return annotationResult; + } + + @Override + public void cleanUp() { + // stub - nothing to do to close TruthTeller + + } + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotatorAE.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotatorAE.java index 3a80fc1d..f7ed6016 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotatorAE.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/biu/en/predicatetruth/TruthTellerAnnotatorAE.java @@ -1,38 +1,38 @@ -package eu.excitementproject.eop.transformations.biu.en.predicatetruth; - -import java.io.File; - -import org.uimafit.descriptor.ConfigurationParameter; - -import eu.excitementproject.eop.common.datastructures.Envelope; -import eu.excitementproject.eop.transformations.uima.ae.truthteller.PredicateTruthAE; - -/** - * Inherits truth annotations, and makes specific calls for Truth Teller's wrapper - * @author Gabi Stanovsky - * @since Aug 2014 - */ - -public class TruthTellerAnnotatorAE extends PredicateTruthAE { - - - // get the configuration parameter - public static final String PARAM_CONFIG = "annotationRulesFile"; - @ConfigurationParameter(name = PARAM_CONFIG, mandatory = true) - private File annotationRulesFile; - - - @Override - protected TruthTellerAnnotator buildInnerTool() throws Exception { - TruthTellerAnnotator ret = new TruthTellerAnnotator(annotationRulesFile); - ret.init(); - return ret; - } - - @Override - protected final Envelope getEnvelope(){return envelope;} - - - private static Envelope envelope = new Envelope(); - -} +package eu.excitementproject.eop.transformations.biu.en.predicatetruth; + +import java.io.File; + +import org.uimafit.descriptor.ConfigurationParameter; + +import eu.excitementproject.eop.common.datastructures.Envelope; +import eu.excitementproject.eop.transformations.uima.ae.truthteller.PredicateTruthAE; + +/** + * Inherits truth annotations, and makes specific calls for Truth Teller's wrapper + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public class TruthTellerAnnotatorAE extends PredicateTruthAE { + + + // get the configuration parameter + public static final String PARAM_CONFIG = "annotationRulesFile"; + @ConfigurationParameter(name = PARAM_CONFIG, mandatory = true) + private File annotationRulesFile; + + + @Override + protected TruthTellerAnnotator buildInnerTool() throws Exception { + TruthTellerAnnotator ret = new TruthTellerAnnotator(annotationRulesFile); + ret.init(); + return ret; + } + + @Override + protected final Envelope getEnvelope(){return envelope;} + + + private static Envelope envelope = new Envelope(); + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAligner.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAligner.java index 954ddfbb..983ef040 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAligner.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAligner.java @@ -1,310 +1,310 @@ -package eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -import org.apache.uima.cas.CASException; -import org.apache.uima.jcas.JCas; -import org.apache.uima.jcas.cas.EmptyStringList; -import org.apache.uima.jcas.cas.FSArray; -import org.apache.uima.jcas.cas.NonEmptyStringList; -import org.apache.uima.jcas.cas.StringList; -import org.apache.uima.jcas.tcas.Annotation; -import org.uimafit.util.JCasUtil; - -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; -import eu.excitement.type.alignment.Link; -import eu.excitement.type.alignment.Link.Direction; -import eu.excitement.type.alignment.Target; -import eu.excitement.type.predicatetruth.PredicateTruth; -import eu.excitement.type.predicatetruth.PredicateTruthNegative; -import eu.excitement.type.predicatetruth.PredicateTruthPositive; -import eu.excitement.type.predicatetruth.PredicateTruthUncertain; -import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; -import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; -import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; -import eu.excitementproject.eop.common.utilities.uima.UimaUtils; -import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; - - - -/** - * Produces alignment links between the text and the hypothesis, - * based on the predicate truth annotations - *

- * Usage: align a sentence pair by calling the annotate method. - * When the {@linkplain Aligner} object is no longer to be used, the - * {@link #cleanUp()} method should be called. - * - * @author Gabi Stanovsky - * @since Aug 2014 - */ - - -public class PredicateTruthAligner implements AlignmentComponent { - - private JCas textView, hypoView; - - //constant values used for aligner description - public static final String ALIGNER_ID = "PredicateTruth"; - public static final String ALIGNER_VERSION = "TruthTeller_1.0"; - public static final String ALIGNEMNT_TYPE_AGREEING_POSITIVE = "Agreeing_Positive_Predicate_Truth"; - public static final String ALIGNEMNT_TYPE_AGREEING_NEGATIVE = "Agreeing_Negative_Predicate_Truth"; - public static final String ALIGNEMNT_TYPE_DISAGREEING = "Disagreeing_Predicate_Truth"; - public static final String ALIGNEMNT_TYPE_NON_MATCHING = "Non_Matching_Predicate_Truth"; - //group labels - public static final String GROUP_LABEL_OPPOSITE_PREDICATE_TRUTH = "OPPOSITE_PREDICATE_TRUTH"; - public static final String GROUP_LABEL_SAME_PREDICATE_TRUTH = "SAME_PREDICATE_TRUTH"; - public static final String GROUP_LABEL_LOCAL_CONTRADICTION = "LOCAL_CONTRADICTION"; - public static final String GROUP_LABEL_LOCAL_ENTAILMENT = "LOCAL_ENTAILMENT"; - - //(currently) constant values used for alignment links - private static final double ALIGNER_CONFIDENCE = 1.0; - private static final Direction ALIGNER_DIRECTION = Direction.Bidirection; - - //store the annotations of predicate truth, for memoization - private Map,Collection> memoTextAnnots; - private Map,Collection> memoHypoAnnots; - private static final List> ptTypes = new ArrayList>(){ - private static final long serialVersionUID = 8489900798036315449L; - - { - add(PredicateTruthPositive.class); - add(PredicateTruthNegative.class); - add(PredicateTruthUncertain.class); - }}; - - - - /** - * default constructor - * set all members to null - */ - public PredicateTruthAligner(){ - textView = null; - hypoView = null; - } - - @Override - public void annotate(JCas aJCas) throws PairAnnotatorComponentException { - try { - // create possible group labels instances for this jcas - StringList localEntailment = createStringList(aJCas, new ArrayList() { - private static final long serialVersionUID = 1L; - - { - add(GROUP_LABEL_SAME_PREDICATE_TRUTH); - add(GROUP_LABEL_LOCAL_ENTAILMENT); - }}); - - StringList localContradiction = createStringList(aJCas, new ArrayList() { - private static final long serialVersionUID = 1L; - - { - add(GROUP_LABEL_OPPOSITE_PREDICATE_TRUTH); - add(GROUP_LABEL_LOCAL_CONTRADICTION); - }}); - - StringList emptyGroupLabel = new EmptyStringList(aJCas); - - - // Get the text and hypothesis views - textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); - hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); - - // Record annotations - memoTextAnnots = new HashMap,Collection>(); - memoHypoAnnots = new HashMap,Collection>(); - - for (Class ptType : ptTypes){ - memoTextAnnots.put(ptType, JCasUtil.select(textView, ptType)); - memoHypoAnnots.put(ptType, JCasUtil.select(hypoView, ptType)); - } - - - // add alignment links - // Agreeing Positive Predicate Truth - // PT+ <-> PT+ - createPredicateTruthLinks(PredicateTruthPositive.class,PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_AGREEING_POSITIVE,localEntailment); - - // Agreeing Negative Predicate Truth - // PT- <-> PT- - createPredicateTruthLinks(PredicateTruthNegative.class,PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_AGREEING_NEGATIVE,localEntailment); - - // Disagreeing Predicate Truth - // PT+ <-> PT- - createPredicateTruthLinks(PredicateTruthPositive.class,PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_DISAGREEING,localContradiction); - // PT- <-> PT+ - createPredicateTruthLinks(PredicateTruthNegative.class,PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_DISAGREEING,localContradiction); - - // Non Matching Predicate Truth - // PT+ <-> PT? - createPredicateTruthLinks(PredicateTruthPositive.class,PredicateTruthUncertain.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); - // PT- <-> PT? - createPredicateTruthLinks(PredicateTruthNegative.class,PredicateTruthUncertain.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); - // PT? <-> PT+ - createPredicateTruthLinks(PredicateTruthUncertain.class,PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); - // PT? <-> PT- - createPredicateTruthLinks(PredicateTruthUncertain.class,PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); - - } - catch (CASException e) { - throw new PairAnnotatorComponentException(e); - } - } - - - - @Override - public String getComponentName() { - // Name of this component that is used to identify the related configuration section - return this.getClass().getName(); - } - - @Override - public String getInstanceName() { - // This component does not support instance configuration - return null; - } - - @Override - public void close() throws AlignmentComponentException - { - - } - - - /** - * Draw bidirectional links between all predicate truth annotation of type (TextType) in text and truth annotation of type (hypoType) in Hypothesis - * @param textType - * @param hypoType - * @param confidence - * @param linkDirection - * @param linkInfo - * @throws CASException - */ - private void createPredicateTruthLinks(Class textType, Class hypoType, double confidence,Direction linkDirection,String linkInfo,StringList linkGroupLabel) throws CASException{ - - // get relevant annotations from text and hypothesis - use pre-recorded annotations - Collection textAnnotations = memoTextAnnots.get(textType); - Collection hypoAnnotations = memoHypoAnnots.get(hypoType); - - // mark links between all of the found types - for (Annotation tAnno : textAnnotations){ - for (Annotation hAnno : hypoAnnotations){ - Token tToken = UimaUtils.selectCoveredSingle(textView, Token.class, tAnno); - Token hToken = UimaUtils.selectCoveredSingle(hypoView, Token.class, hAnno); - addAlignmentAnnotations(tToken,hToken, confidence, linkDirection, linkInfo, linkGroupLabel); - } - } - - } - - /** - * Add an alignment link from T to H, based on the rule t->h - * in which t is a phrase in T from index textStart to textEnd of the tokens, - * and h is a phrase in H from index hypoStart to hypoEnd of the tokens, - * @param textToken Token in TextView to annotate - * @param hypoToken Token in HypoView to annotate - * @param confidence The confidence of the rule - * @param linkDirection The direction of the link (t to h, h to t or bidirectional). - * @param linkInfo The relation of the rule (Wordnet synonym, Wikipedia redirect etc). - * @param linkGroupLabel - * @throws CASException - */ - private void addAlignmentAnnotations(Token textToken, Token hypoToken, - double confidence, - Direction linkDirection, - String linkInfo, StringList linkGroupLabel) - throws CASException { - - - // Prepare the Target instances - Target textTarget = new Target(textView); - Target hypoTarget = new Target(hypoView); - - - // Prepare an FSArray instance and put the target annotations in it - FSArray textAnnots = new FSArray(textView, 1); - FSArray hypoAnnots = new FSArray(hypoView, 1); - - textAnnots.set(0, textToken); - hypoAnnots.set(0, hypoToken); - - textTarget.setTargetAnnotations(textAnnots); - hypoTarget.setTargetAnnotations(hypoAnnots); - - // Set begin and end value of the Target annotations - textTarget.setBegin(textToken.getBegin()); - textTarget.setEnd(textToken.getEnd()); - hypoTarget.setBegin(hypoToken.getBegin()); - hypoTarget.setEnd(hypoToken.getEnd()); - - // Add the targets to the indices - textTarget.addToIndexes(); - hypoTarget.addToIndexes(); - - // Mark an alignment.Link and add it to the hypothesis view - Link link = new Link(hypoView); - link.setTSideTarget(textTarget); - link.setHSideTarget(hypoTarget); - - // Set the link direction - link.setDirection(linkDirection); - - // Set strength - link.setStrength(confidence); - - // Set Group label - link.setGroupLabel(linkGroupLabel); - - - // Add the link information - link.setAlignerID(ALIGNER_ID); - link.setAlignerVersion(ALIGNER_VERSION); - link.setLinkInfo(linkInfo); - - // Mark begin and end according to the hypothesis target - link.setBegin(hypoTarget.getBegin()); - link.setEnd(hypoTarget.getEnd()); - - // Add to index - link.addToIndexes(); - } - - /** - * Converts a collection of string into a a Uima Stringlist - * @param aJCas - Jcas to which to attach the string list? - * @param aCollection - the collection to be converted - * @return a Uima Stringlist, consisting of all the elements in aCollection - */ - private static StringList createStringList(JCas aJCas, - Collection aCollection) - { - if (aCollection.size() == 0) { - return new EmptyStringList(aJCas); - } - - NonEmptyStringList head = new NonEmptyStringList(aJCas); - NonEmptyStringList list = head; - Iterator i = aCollection.iterator(); - while (i.hasNext()) { - head.setHead(i.next()); - if (i.hasNext()) { - head.setTail(new NonEmptyStringList(aJCas)); - head = (NonEmptyStringList) head.getTail(); - } - else { - head.setTail(new EmptyStringList(aJCas)); - } - } - - return list; - } - - -} +package eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.EmptyStringList; +import org.apache.uima.jcas.cas.FSArray; +import org.apache.uima.jcas.cas.NonEmptyStringList; +import org.apache.uima.jcas.cas.StringList; +import org.apache.uima.jcas.tcas.Annotation; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.Link.Direction; +import eu.excitement.type.alignment.Target; +import eu.excitement.type.predicatetruth.PredicateTruth; +import eu.excitement.type.predicatetruth.PredicateTruthNegative; +import eu.excitement.type.predicatetruth.PredicateTruthPositive; +import eu.excitement.type.predicatetruth.PredicateTruthUncertain; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponentException; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.common.utilities.uima.UimaUtils; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; + + + +/** + * Produces alignment links between the text and the hypothesis, + * based on the predicate truth annotations + *

+ * Usage: align a sentence pair by calling the annotate method. + * When the {@linkplain Aligner} object is no longer to be used, the + * {@link #cleanUp()} method should be called. + * + * @author Gabi Stanovsky + * @since Aug 2014 + */ + + +public class PredicateTruthAligner implements AlignmentComponent { + + private JCas textView, hypoView; + + //constant values used for aligner description + public static final String ALIGNER_ID = "PredicateTruth"; + public static final String ALIGNER_VERSION = "TruthTeller_1.0"; + public static final String ALIGNEMNT_TYPE_AGREEING_POSITIVE = "Agreeing_Positive_Predicate_Truth"; + public static final String ALIGNEMNT_TYPE_AGREEING_NEGATIVE = "Agreeing_Negative_Predicate_Truth"; + public static final String ALIGNEMNT_TYPE_DISAGREEING = "Disagreeing_Predicate_Truth"; + public static final String ALIGNEMNT_TYPE_NON_MATCHING = "Non_Matching_Predicate_Truth"; + //group labels + public static final String GROUP_LABEL_OPPOSITE_PREDICATE_TRUTH = "OPPOSITE_PREDICATE_TRUTH"; + public static final String GROUP_LABEL_SAME_PREDICATE_TRUTH = "SAME_PREDICATE_TRUTH"; + public static final String GROUP_LABEL_LOCAL_CONTRADICTION = "LOCAL_CONTRADICTION"; + public static final String GROUP_LABEL_LOCAL_ENTAILMENT = "LOCAL_ENTAILMENT"; + + //(currently) constant values used for alignment links + private static final double ALIGNER_CONFIDENCE = 1.0; + private static final Direction ALIGNER_DIRECTION = Direction.Bidirection; + + //store the annotations of predicate truth, for memoization + private Map,Collection> memoTextAnnots; + private Map,Collection> memoHypoAnnots; + private static final List> ptTypes = new ArrayList>(){ + private static final long serialVersionUID = 8489900798036315449L; + + { + add(PredicateTruthPositive.class); + add(PredicateTruthNegative.class); + add(PredicateTruthUncertain.class); + }}; + + + + /** + * default constructor + * set all members to null + */ + public PredicateTruthAligner(){ + textView = null; + hypoView = null; + } + + @Override + public void annotate(JCas aJCas) throws PairAnnotatorComponentException { + try { + // create possible group labels instances for this jcas + StringList localEntailment = createStringList(aJCas, new ArrayList() { + private static final long serialVersionUID = 1L; + + { + add(GROUP_LABEL_SAME_PREDICATE_TRUTH); + add(GROUP_LABEL_LOCAL_ENTAILMENT); + }}); + + StringList localContradiction = createStringList(aJCas, new ArrayList() { + private static final long serialVersionUID = 1L; + + { + add(GROUP_LABEL_OPPOSITE_PREDICATE_TRUTH); + add(GROUP_LABEL_LOCAL_CONTRADICTION); + }}); + + StringList emptyGroupLabel = new EmptyStringList(aJCas); + + + // Get the text and hypothesis views + textView = aJCas.getView(LAP_ImplBase.TEXTVIEW); + hypoView = aJCas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + // Record annotations + memoTextAnnots = new HashMap,Collection>(); + memoHypoAnnots = new HashMap,Collection>(); + + for (Class ptType : ptTypes){ + memoTextAnnots.put(ptType, JCasUtil.select(textView, ptType)); + memoHypoAnnots.put(ptType, JCasUtil.select(hypoView, ptType)); + } + + + // add alignment links + // Agreeing Positive Predicate Truth + // PT+ <-> PT+ + createPredicateTruthLinks(PredicateTruthPositive.class,PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_AGREEING_POSITIVE,localEntailment); + + // Agreeing Negative Predicate Truth + // PT- <-> PT- + createPredicateTruthLinks(PredicateTruthNegative.class,PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_AGREEING_NEGATIVE,localEntailment); + + // Disagreeing Predicate Truth + // PT+ <-> PT- + createPredicateTruthLinks(PredicateTruthPositive.class,PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_DISAGREEING,localContradiction); + // PT- <-> PT+ + createPredicateTruthLinks(PredicateTruthNegative.class,PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_DISAGREEING,localContradiction); + + // Non Matching Predicate Truth + // PT+ <-> PT? + createPredicateTruthLinks(PredicateTruthPositive.class,PredicateTruthUncertain.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); + // PT- <-> PT? + createPredicateTruthLinks(PredicateTruthNegative.class,PredicateTruthUncertain.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); + // PT? <-> PT+ + createPredicateTruthLinks(PredicateTruthUncertain.class,PredicateTruthPositive.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); + // PT? <-> PT- + createPredicateTruthLinks(PredicateTruthUncertain.class,PredicateTruthNegative.class, ALIGNER_CONFIDENCE, ALIGNER_DIRECTION,ALIGNEMNT_TYPE_NON_MATCHING,emptyGroupLabel); + + } + catch (CASException e) { + throw new PairAnnotatorComponentException(e); + } + } + + + + @Override + public String getComponentName() { + // Name of this component that is used to identify the related configuration section + return this.getClass().getName(); + } + + @Override + public String getInstanceName() { + // This component does not support instance configuration + return null; + } + + @Override + public void close() throws AlignmentComponentException + { + + } + + + /** + * Draw bidirectional links between all predicate truth annotation of type (TextType) in text and truth annotation of type (hypoType) in Hypothesis + * @param textType + * @param hypoType + * @param confidence + * @param linkDirection + * @param linkInfo + * @throws CASException + */ + private void createPredicateTruthLinks(Class textType, Class hypoType, double confidence,Direction linkDirection,String linkInfo,StringList linkGroupLabel) throws CASException{ + + // get relevant annotations from text and hypothesis - use pre-recorded annotations + Collection textAnnotations = memoTextAnnots.get(textType); + Collection hypoAnnotations = memoHypoAnnots.get(hypoType); + + // mark links between all of the found types + for (Annotation tAnno : textAnnotations){ + for (Annotation hAnno : hypoAnnotations){ + Token tToken = UimaUtils.selectCoveredSingle(textView, Token.class, tAnno); + Token hToken = UimaUtils.selectCoveredSingle(hypoView, Token.class, hAnno); + addAlignmentAnnotations(tToken,hToken, confidence, linkDirection, linkInfo, linkGroupLabel); + } + } + + } + + /** + * Add an alignment link from T to H, based on the rule t->h + * in which t is a phrase in T from index textStart to textEnd of the tokens, + * and h is a phrase in H from index hypoStart to hypoEnd of the tokens, + * @param textToken Token in TextView to annotate + * @param hypoToken Token in HypoView to annotate + * @param confidence The confidence of the rule + * @param linkDirection The direction of the link (t to h, h to t or bidirectional). + * @param linkInfo The relation of the rule (Wordnet synonym, Wikipedia redirect etc). + * @param linkGroupLabel + * @throws CASException + */ + private void addAlignmentAnnotations(Token textToken, Token hypoToken, + double confidence, + Direction linkDirection, + String linkInfo, StringList linkGroupLabel) + throws CASException { + + + // Prepare the Target instances + Target textTarget = new Target(textView); + Target hypoTarget = new Target(hypoView); + + + // Prepare an FSArray instance and put the target annotations in it + FSArray textAnnots = new FSArray(textView, 1); + FSArray hypoAnnots = new FSArray(hypoView, 1); + + textAnnots.set(0, textToken); + hypoAnnots.set(0, hypoToken); + + textTarget.setTargetAnnotations(textAnnots); + hypoTarget.setTargetAnnotations(hypoAnnots); + + // Set begin and end value of the Target annotations + textTarget.setBegin(textToken.getBegin()); + textTarget.setEnd(textToken.getEnd()); + hypoTarget.setBegin(hypoToken.getBegin()); + hypoTarget.setEnd(hypoToken.getEnd()); + + // Add the targets to the indices + textTarget.addToIndexes(); + hypoTarget.addToIndexes(); + + // Mark an alignment.Link and add it to the hypothesis view + Link link = new Link(hypoView); + link.setTSideTarget(textTarget); + link.setHSideTarget(hypoTarget); + + // Set the link direction + link.setDirection(linkDirection); + + // Set strength + link.setStrength(confidence); + + // Set Group label + link.setGroupLabel(linkGroupLabel); + + + // Add the link information + link.setAlignerID(ALIGNER_ID); + link.setAlignerVersion(ALIGNER_VERSION); + link.setLinkInfo(linkInfo); + + // Mark begin and end according to the hypothesis target + link.setBegin(hypoTarget.getBegin()); + link.setEnd(hypoTarget.getEnd()); + + // Add to index + link.addToIndexes(); + } + + /** + * Converts a collection of string into a a Uima Stringlist + * @param aJCas - Jcas to which to attach the string list? + * @param aCollection - the collection to be converted + * @return a Uima Stringlist, consisting of all the elements in aCollection + */ + private static StringList createStringList(JCas aJCas, + Collection aCollection) + { + if (aCollection.size() == 0) { + return new EmptyStringList(aJCas); + } + + NonEmptyStringList head = new NonEmptyStringList(aJCas); + NonEmptyStringList list = head; + Iterator i = aCollection.iterator(); + while (i.hasNext()) { + head.setHead(i.next()); + if (i.hasNext()) { + head.setTail(new NonEmptyStringList(aJCas)); + head = (NonEmptyStringList) head.getTail(); + } + else { + head.setTail(new EmptyStringList(aJCas)); + } + } + + return list; + } + + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/generic/truthteller/conll/AnnotateSentenceToConll.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/generic/truthteller/conll/AnnotateSentenceToConll.java index 68b1d09b..9d9e4443 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/generic/truthteller/conll/AnnotateSentenceToConll.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/generic/truthteller/conll/AnnotateSentenceToConll.java @@ -1,250 +1,250 @@ -/** - * - */ -package eu.excitementproject.eop.transformations.generic.truthteller.conll; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Vector; - -import org.apache.log4j.BasicConfigurator; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; - -import eu.excitementproject.eop.common.representation.parse.tree.dependency.basic.BasicNode; -import eu.excitementproject.eop.common.utilities.Utils; -import eu.excitementproject.eop.common.utilities.configuration.ConfigurationException; -import eu.excitementproject.eop.common.utilities.configuration.ConfigurationFile; -import eu.excitementproject.eop.common.utilities.configuration.ConfigurationFileDuplicateKeyException; -import eu.excitementproject.eop.common.utilities.configuration.ConfigurationParams; -import eu.excitementproject.eop.lap.biu.en.parser.ParserRunException; -import eu.excitementproject.eop.lap.biu.en.parser.easyfirst.EasyFirstParser; -import eu.excitementproject.eop.lap.biu.en.sentencesplit.LingPipeSentenceSplitter; -import eu.excitementproject.eop.lap.biu.sentencesplit.SentenceSplitter; -import eu.excitementproject.eop.lap.biu.sentencesplit.SentenceSplitterException; -import eu.excitementproject.eop.transformations.generic.truthteller.AnnotatorException; -import eu.excitementproject.eop.transformations.generic.truthteller.DefaultSentenceAnnotator; -import eu.excitementproject.eop.transformations.representation.ExtendedNode; -import eu.excitementproject.eop.transformations.utilities.TransformationsConfigurationParametersNames; -import eu.excitementproject.eop.transformations.utilities.parsetreeutils.TreeUtilities; - -/** - * @author Amnon Lotan - * - * @since Jul 18, 2012 - */ -public class AnnotateSentenceToConll { - - public static final String INPUT_FILE_INDICATOR = "-f"; - - private static Logger logger = null; - - private static AnnotatedConllStringConverter CONLL_CONVERTER = new AnnotatedConllStringConverter(); - private static SentenceSplitter SENTENCE_SPLITTER = new LingPipeSentenceSplitter(); - private EasyFirstParser parser; - private DefaultSentenceAnnotator annotator; - private final File conllOutputFolder; - - private ConfigurationParams annotationParams = null; - - /** - * Ctor - * @throws ConfigurationException - * @throws ConllConverterException - */ - public AnnotateSentenceToConll(ConfigurationFile confFile) throws ConfigurationException, ConllConverterException { - - confFile.setExpandingEnvironmentVariables(true); - annotationParams = confFile.getModuleConfiguration(TransformationsConfigurationParametersNames.TRUTH_TELLER_MODULE_NAME); - - try { - annotator = new DefaultSentenceAnnotator(annotationParams); - - String posTaggerString = annotationParams.get(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST); - String easyFirstHost = annotationParams.get(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST_HOST); - int easyFirstPort = annotationParams.getInt(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST_PORT); - parser = new EasyFirstParser(easyFirstHost, easyFirstPort, posTaggerString); - parser.init(); - } catch (Exception e) { - throw new ConllConverterException("see nested", e); - } - - String conllOutputFolderPath = annotationParams.get(TransformationsConfigurationParametersNames.CONLL_FORMAT_OUTPUT_DIRECTORY); - conllOutputFolder = new File(conllOutputFolderPath); - conllOutputFolder.mkdirs(); - } - - - /** - * Get some text, sentence split it, and return - * @param sentence - * @return - * @throws ConllConverterException - */ - public String textToAnnotatedConllFiles(String sentence) throws ConllConverterException - { - ExtendedNode annotatedSentece = annotateSentece(sentence); - String conllString = AnnotatedTreeToConllCoverter.treeToConll(annotatedSentece , CONLL_CONVERTER); - return conllString; - } - - /** - * Get a single sentence, annotate it, and return its string CoNLL representation. - * @param sentence - * @return - * @throws ConllConverterException - */ - public String sentenceToAnnotatedConllString(String sentence) throws ConllConverterException - { - ExtendedNode annotatedSentece = annotateSentece(sentence); - String conllString = AnnotatedTreeToConllCoverter.treeToConll(annotatedSentece , CONLL_CONVERTER); - return conllString; - } - - - public List getSentencesToAnnotate(String inputFileName) throws ConfigurationException, FileNotFoundException, IOException - { - List sentences = new LinkedList(); - File inputFile = new File(inputFileName); - try(BufferedReader reader = new BufferedReader(new FileReader(inputFile))) - { - String line = reader.readLine(); - while (line !=null) - { - sentences.add(line); - line = reader.readLine(); - } - } - return sentences; - } - - private ExtendedNode annotateSentece(String sentence) throws ConllConverterException - { - parser.setSentence(sentence); - ExtendedNode annotatedSentece; - try { - parser.parse(); - BasicNode parsedTree = parser.getParseTree(); - ExtendedNode extendedTree = TreeUtilities.copyFromBasicNode(parsedTree); - annotator.setTree(extendedTree); - annotator.annotate(); - annotatedSentece = annotator.getAnnotatedTree(); - } catch (Exception e) { - throw new ConllConverterException("see nested", e); - } - return annotatedSentece; - } - - /** - * Command Line DEMO for the TruthTeller: get the configuration file and text sentence(s), annotate the sentences and print each one in CoNLL format to a separate file. - * - * @param args - * @throws AnnotatorException - * @throws ConfigurationException - * @throws ConfigurationFileDuplicateKeyException - * @throws ParserRunException - * @throws ConllConverterException - * @throws SentenceSplitterException - * @throws IOException - * @throws FileNotFoundException - */ - public static void main(String[] args) - { - BasicConfigurator.configure(); - Logger.getRootLogger().setLevel(Level.INFO); - logger = Logger.getLogger(AnnotateSentenceToConll.class); - try - { - annotateByCommandLineArguments(args); - } - catch(Throwable t) - { - t.printStackTrace(System.out); - logger.error("TruthTeller failed.",t); - } - } - - private static Iterable getSentencesIterable(Iterator argsIterator, AnnotateSentenceToConll app) throws FileNotFoundException, ConfigurationException, IOException, SentenceSplitterException - { - List sentencesToAnnotate = null; - - - String firstArgumentAfterConfigurationFile = null; - if (argsIterator.hasNext()) - { - firstArgumentAfterConfigurationFile = argsIterator.next(); - } - - - if (INPUT_FILE_INDICATOR.equalsIgnoreCase(firstArgumentAfterConfigurationFile)) - { - if (argsIterator.hasNext()) - { - sentencesToAnnotate = app.getSentencesToAnnotate(argsIterator.next()); - } - else - { - throw new RuntimeException("No input file is given, though \""+INPUT_FILE_INDICATOR+"\" has been encountered as a command line argument."); - } - } - else - { - // Read the text from command line - StringBuffer sbInputWords = new StringBuffer(); - - if (firstArgumentAfterConfigurationFile!=null) - { - sbInputWords.append(firstArgumentAfterConfigurationFile); - while (argsIterator.hasNext()) - { - sbInputWords.append(" "); - sbInputWords.append(argsIterator.next()); - } - } - -// List listOfWords = Utils.arrayToCollection(args, new Vector()); -// listOfWords.remove(0); // remove the confFile parameter -// listOfWords.remove(1); // remove the pos-tagger-file-name -// String text = StringUtil.joinIterableToString(listOfWords, " "); - - String text = sbInputWords.toString(); - - SENTENCE_SPLITTER.setDocument(text); - SENTENCE_SPLITTER.split(); - sentencesToAnnotate = SENTENCE_SPLITTER.getSentences(); - } - - return sentencesToAnnotate; - } - - - private static void annotateByCommandLineArguments(String[] args) throws AnnotatorException, ConfigurationFileDuplicateKeyException, ConfigurationException, ParserRunException, ConllConverterException, SentenceSplitterException, FileNotFoundException, IOException - { - if (args.length < (1)) - throw new AnnotatorException(String.format("usage: %s configurationFile.xml sentence(s)", AnnotateSentenceToConll.class.getSimpleName())); - - List argsList = Utils.arrayToCollection(args, new Vector()); - Iterator argsIterator = argsList.iterator(); - - ConfigurationFile confFile = new ConfigurationFile(new File(argsIterator.next())); - confFile.setExpandingEnvironmentVariables(true); - AnnotateSentenceToConll app = new AnnotateSentenceToConll(confFile); - - - Iterable sentencesToAnnotate = getSentencesIterable(argsIterator,app); - - List list = new ArrayList(); - for (String sentence : sentencesToAnnotate) - { - ExtendedNode annotatedSentece = app.annotateSentece(sentence); - list.add(annotatedSentece); - } - AnnotatedTreeToConllCoverter.treesToConllFiles(list, app.conllOutputFolder, CONLL_CONVERTER); - } +/** + * + */ +package eu.excitementproject.eop.transformations.generic.truthteller.conll; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Vector; + +import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; + +import eu.excitementproject.eop.common.representation.parse.tree.dependency.basic.BasicNode; +import eu.excitementproject.eop.common.utilities.Utils; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationException; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationFile; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationFileDuplicateKeyException; +import eu.excitementproject.eop.common.utilities.configuration.ConfigurationParams; +import eu.excitementproject.eop.lap.biu.en.parser.ParserRunException; +import eu.excitementproject.eop.lap.biu.en.parser.easyfirst.EasyFirstParser; +import eu.excitementproject.eop.lap.biu.en.sentencesplit.LingPipeSentenceSplitter; +import eu.excitementproject.eop.lap.biu.sentencesplit.SentenceSplitter; +import eu.excitementproject.eop.lap.biu.sentencesplit.SentenceSplitterException; +import eu.excitementproject.eop.transformations.generic.truthteller.AnnotatorException; +import eu.excitementproject.eop.transformations.generic.truthteller.DefaultSentenceAnnotator; +import eu.excitementproject.eop.transformations.representation.ExtendedNode; +import eu.excitementproject.eop.transformations.utilities.TransformationsConfigurationParametersNames; +import eu.excitementproject.eop.transformations.utilities.parsetreeutils.TreeUtilities; + +/** + * @author Amnon Lotan + * + * @since Jul 18, 2012 + */ +public class AnnotateSentenceToConll { + + public static final String INPUT_FILE_INDICATOR = "-f"; + + private static Logger logger = null; + + private static AnnotatedConllStringConverter CONLL_CONVERTER = new AnnotatedConllStringConverter(); + private static SentenceSplitter SENTENCE_SPLITTER = new LingPipeSentenceSplitter(); + private EasyFirstParser parser; + private DefaultSentenceAnnotator annotator; + private final File conllOutputFolder; + + private ConfigurationParams annotationParams = null; + + /** + * Ctor + * @throws ConfigurationException + * @throws ConllConverterException + */ + public AnnotateSentenceToConll(ConfigurationFile confFile) throws ConfigurationException, ConllConverterException { + + confFile.setExpandingEnvironmentVariables(true); + annotationParams = confFile.getModuleConfiguration(TransformationsConfigurationParametersNames.TRUTH_TELLER_MODULE_NAME); + + try { + annotator = new DefaultSentenceAnnotator(annotationParams); + + String posTaggerString = annotationParams.get(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST); + String easyFirstHost = annotationParams.get(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST_HOST); + int easyFirstPort = annotationParams.getInt(TransformationsConfigurationParametersNames.PREPROCESS_EASYFIRST_PORT); + parser = new EasyFirstParser(easyFirstHost, easyFirstPort, posTaggerString); + parser.init(); + } catch (Exception e) { + throw new ConllConverterException("see nested", e); + } + + String conllOutputFolderPath = annotationParams.get(TransformationsConfigurationParametersNames.CONLL_FORMAT_OUTPUT_DIRECTORY); + conllOutputFolder = new File(conllOutputFolderPath); + conllOutputFolder.mkdirs(); + } + + + /** + * Get some text, sentence split it, and return + * @param sentence + * @return + * @throws ConllConverterException + */ + public String textToAnnotatedConllFiles(String sentence) throws ConllConverterException + { + ExtendedNode annotatedSentece = annotateSentece(sentence); + String conllString = AnnotatedTreeToConllCoverter.treeToConll(annotatedSentece , CONLL_CONVERTER); + return conllString; + } + + /** + * Get a single sentence, annotate it, and return its string CoNLL representation. + * @param sentence + * @return + * @throws ConllConverterException + */ + public String sentenceToAnnotatedConllString(String sentence) throws ConllConverterException + { + ExtendedNode annotatedSentece = annotateSentece(sentence); + String conllString = AnnotatedTreeToConllCoverter.treeToConll(annotatedSentece , CONLL_CONVERTER); + return conllString; + } + + + public List getSentencesToAnnotate(String inputFileName) throws ConfigurationException, FileNotFoundException, IOException + { + List sentences = new LinkedList(); + File inputFile = new File(inputFileName); + try(BufferedReader reader = new BufferedReader(new FileReader(inputFile))) + { + String line = reader.readLine(); + while (line !=null) + { + sentences.add(line); + line = reader.readLine(); + } + } + return sentences; + } + + private ExtendedNode annotateSentece(String sentence) throws ConllConverterException + { + parser.setSentence(sentence); + ExtendedNode annotatedSentece; + try { + parser.parse(); + BasicNode parsedTree = parser.getParseTree(); + ExtendedNode extendedTree = TreeUtilities.copyFromBasicNode(parsedTree); + annotator.setTree(extendedTree); + annotator.annotate(); + annotatedSentece = annotator.getAnnotatedTree(); + } catch (Exception e) { + throw new ConllConverterException("see nested", e); + } + return annotatedSentece; + } + + /** + * Command Line DEMO for the TruthTeller: get the configuration file and text sentence(s), annotate the sentences and print each one in CoNLL format to a separate file. + * + * @param args + * @throws AnnotatorException + * @throws ConfigurationException + * @throws ConfigurationFileDuplicateKeyException + * @throws ParserRunException + * @throws ConllConverterException + * @throws SentenceSplitterException + * @throws IOException + * @throws FileNotFoundException + */ + public static void main(String[] args) + { + BasicConfigurator.configure(); + Logger.getRootLogger().setLevel(Level.INFO); + logger = Logger.getLogger(AnnotateSentenceToConll.class); + try + { + annotateByCommandLineArguments(args); + } + catch(Throwable t) + { + t.printStackTrace(System.out); + logger.error("TruthTeller failed.",t); + } + } + + private static Iterable getSentencesIterable(Iterator argsIterator, AnnotateSentenceToConll app) throws FileNotFoundException, ConfigurationException, IOException, SentenceSplitterException + { + List sentencesToAnnotate = null; + + + String firstArgumentAfterConfigurationFile = null; + if (argsIterator.hasNext()) + { + firstArgumentAfterConfigurationFile = argsIterator.next(); + } + + + if (INPUT_FILE_INDICATOR.equalsIgnoreCase(firstArgumentAfterConfigurationFile)) + { + if (argsIterator.hasNext()) + { + sentencesToAnnotate = app.getSentencesToAnnotate(argsIterator.next()); + } + else + { + throw new RuntimeException("No input file is given, though \""+INPUT_FILE_INDICATOR+"\" has been encountered as a command line argument."); + } + } + else + { + // Read the text from command line + StringBuffer sbInputWords = new StringBuffer(); + + if (firstArgumentAfterConfigurationFile!=null) + { + sbInputWords.append(firstArgumentAfterConfigurationFile); + while (argsIterator.hasNext()) + { + sbInputWords.append(" "); + sbInputWords.append(argsIterator.next()); + } + } + +// List listOfWords = Utils.arrayToCollection(args, new Vector()); +// listOfWords.remove(0); // remove the confFile parameter +// listOfWords.remove(1); // remove the pos-tagger-file-name +// String text = StringUtil.joinIterableToString(listOfWords, " "); + + String text = sbInputWords.toString(); + + SENTENCE_SPLITTER.setDocument(text); + SENTENCE_SPLITTER.split(); + sentencesToAnnotate = SENTENCE_SPLITTER.getSentences(); + } + + return sentencesToAnnotate; + } + + + private static void annotateByCommandLineArguments(String[] args) throws AnnotatorException, ConfigurationFileDuplicateKeyException, ConfigurationException, ParserRunException, ConllConverterException, SentenceSplitterException, FileNotFoundException, IOException + { + if (args.length < (1)) + throw new AnnotatorException(String.format("usage: %s configurationFile.xml sentence(s)", AnnotateSentenceToConll.class.getSimpleName())); + + List argsList = Utils.arrayToCollection(args, new Vector()); + Iterator argsIterator = argsList.iterator(); + + ConfigurationFile confFile = new ConfigurationFile(new File(argsIterator.next())); + confFile.setExpandingEnvironmentVariables(true); + AnnotateSentenceToConll app = new AnnotateSentenceToConll(confFile); + + + Iterable sentencesToAnnotate = getSentencesIterable(argsIterator,app); + + List list = new ArrayList(); + for (String sentence : sentencesToAnnotate) + { + ExtendedNode annotatedSentece = app.annotateSentece(sentence); + list.add(annotatedSentece); + } + AnnotatedTreeToConllCoverter.treesToConllFiles(list, app.conllOutputFolder, CONLL_CONVERTER); + } } diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTeller.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTeller.java index 14ea8c13..4ac277a0 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTeller.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTeller.java @@ -1,92 +1,92 @@ -package eu.excitementproject.eop.transformations.uima; -import static org.uimafit.factory.AnalysisEngineFactory.createPrimitiveDescription; - -import org.apache.uima.analysis_engine.AnalysisEngineDescription; -import org.apache.uima.resource.ResourceInitializationException; - -import eu.excitementproject.eop.common.configuration.CommonConfig; -import eu.excitementproject.eop.common.configuration.NameValueTable; -import eu.excitementproject.eop.common.exception.ConfigurationException; -import eu.excitementproject.eop.lap.LAPAccess; -import eu.excitementproject.eop.lap.LAPException; -import eu.excitementproject.eop.lap.biu.uima.ae.ner.StanfordNamedEntityRecognizerAE; -import eu.excitementproject.eop.lap.biu.uima.ae.parser.EasyFirstParserAE; -import eu.excitementproject.eop.lap.biu.uima.ae.postagger.MaxentPosTaggerAE; -import eu.excitementproject.eop.lap.biu.uima.ae.sentencesplitter.LingPipeSentenceSplitterAE; -import eu.excitementproject.eop.lap.biu.uima.ae.tokenizer.MaxentTokenizerAE; -import eu.excitementproject.eop.lap.implbase.LAP_ImplBaseAE; -import eu.excitementproject.eop.transformations.biu.en.predicatetruth.TruthTellerAnnotatorAE; -import eu.excitementproject.eop.transformations.utilities.TransformationsConfigurationParametersNames; - -/** - * A class to extend BIUFullLAP with truth annotations - * This is implemented within the Transformations package in order to avoid circular dependency between packages - * @author Gabi Stanovsky - * @since Aug 2014 - */ - - -public class BIUFullLAPWithTruthTeller extends LAP_ImplBaseAE implements LAPAccess { - - - public BIUFullLAPWithTruthTeller(String taggerModelFile, String nerModelFile, String parserHost, Integer parserPort, String truthTellerAnnotationsFile) throws LAPException { - try - { - // Step a) Build analysis engine descriptions - AnalysisEngineDescription splitter = createPrimitiveDescription(LingPipeSentenceSplitterAE.class); - AnalysisEngineDescription tokenizer = createPrimitiveDescription(MaxentTokenizerAE.class); - AnalysisEngineDescription tagger = createPrimitiveDescription(MaxentPosTaggerAE.class, - MaxentPosTaggerAE.PARAM_MODEL_FILE , taggerModelFile); - AnalysisEngineDescription ner = createPrimitiveDescription(StanfordNamedEntityRecognizerAE.class, - StanfordNamedEntityRecognizerAE.PARAM_MODEL_FILE , nerModelFile); - AnalysisEngineDescription parser = createPrimitiveDescription(EasyFirstParserAE.class, - EasyFirstParserAE.PARAM_HOST , parserHost, - EasyFirstParserAE.PARAM_PORT , parserPort - ); - AnalysisEngineDescription truthteller = createPrimitiveDescription(TruthTellerAnnotatorAE.class, - TruthTellerAnnotatorAE.PARAM_CONFIG , truthTellerAnnotationsFile); - - AnalysisEngineDescription[] descs = new AnalysisEngineDescription[] { - splitter, - tokenizer, - tagger, - ner, - parser, - truthteller, - }; - - // Step b) call initializeViews() - // initialize view with EOP default views. - initializeViews(descs); - - // Step c) set lang ID - languageIdentifier = "EN"; - } - catch (ResourceInitializationException e) - { - throw new LAPException(e); - } - } - - public BIUFullLAPWithTruthTeller(NameValueTable biuFullLAPSection, NameValueTable truthTellerSection) throws LAPException, ConfigurationException { - this( - biuFullLAPSection.getFile(DEFAULT_TAGGER_MODEL_FILE_PARAM).getAbsolutePath(), - biuFullLAPSection.getFile(DEFAULT_NER_MODEL_FILE_PARAM).getAbsolutePath(), - biuFullLAPSection.getString(DEFAULT_PARSER_HOST_NAME), - biuFullLAPSection.getInteger(DEFAULT_PARSER_PORT_NAME), - truthTellerSection.getFile(TransformationsConfigurationParametersNames.ANNOTATION_RULES_FILE).getAbsolutePath() - ); - } - - public BIUFullLAPWithTruthTeller(CommonConfig config) throws LAPException, ConfigurationException { - this(config.getSection(DEFAULT_SECTION_NAME), - config.getSection(TransformationsConfigurationParametersNames.TRUTH_TELLER_MODULE_NAME)); - } - - private static final String DEFAULT_SECTION_NAME = "rte_pairs_preprocess"; - private static final String DEFAULT_TAGGER_MODEL_FILE_PARAM = "easyfirst_stanford_pos_tagger"; - private static final String DEFAULT_NER_MODEL_FILE_PARAM = "stanford_ner_classifier_path"; - private static final String DEFAULT_PARSER_HOST_NAME = "easyfirst_host"; - private static final String DEFAULT_PARSER_PORT_NAME = "easyfirst_port"; - -} +package eu.excitementproject.eop.transformations.uima; +import static org.uimafit.factory.AnalysisEngineFactory.createPrimitiveDescription; + +import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.resource.ResourceInitializationException; + +import eu.excitementproject.eop.common.configuration.CommonConfig; +import eu.excitementproject.eop.common.configuration.NameValueTable; +import eu.excitementproject.eop.common.exception.ConfigurationException; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.biu.uima.ae.ner.StanfordNamedEntityRecognizerAE; +import eu.excitementproject.eop.lap.biu.uima.ae.parser.EasyFirstParserAE; +import eu.excitementproject.eop.lap.biu.uima.ae.postagger.MaxentPosTaggerAE; +import eu.excitementproject.eop.lap.biu.uima.ae.sentencesplitter.LingPipeSentenceSplitterAE; +import eu.excitementproject.eop.lap.biu.uima.ae.tokenizer.MaxentTokenizerAE; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBaseAE; +import eu.excitementproject.eop.transformations.biu.en.predicatetruth.TruthTellerAnnotatorAE; +import eu.excitementproject.eop.transformations.utilities.TransformationsConfigurationParametersNames; + +/** + * A class to extend BIUFullLAP with truth annotations + * This is implemented within the Transformations package in order to avoid circular dependency between packages + * @author Gabi Stanovsky + * @since Aug 2014 + */ + + +public class BIUFullLAPWithTruthTeller extends LAP_ImplBaseAE implements LAPAccess { + + + public BIUFullLAPWithTruthTeller(String taggerModelFile, String nerModelFile, String parserHost, Integer parserPort, String truthTellerAnnotationsFile) throws LAPException { + try + { + // Step a) Build analysis engine descriptions + AnalysisEngineDescription splitter = createPrimitiveDescription(LingPipeSentenceSplitterAE.class); + AnalysisEngineDescription tokenizer = createPrimitiveDescription(MaxentTokenizerAE.class); + AnalysisEngineDescription tagger = createPrimitiveDescription(MaxentPosTaggerAE.class, + MaxentPosTaggerAE.PARAM_MODEL_FILE , taggerModelFile); + AnalysisEngineDescription ner = createPrimitiveDescription(StanfordNamedEntityRecognizerAE.class, + StanfordNamedEntityRecognizerAE.PARAM_MODEL_FILE , nerModelFile); + AnalysisEngineDescription parser = createPrimitiveDescription(EasyFirstParserAE.class, + EasyFirstParserAE.PARAM_HOST , parserHost, + EasyFirstParserAE.PARAM_PORT , parserPort + ); + AnalysisEngineDescription truthteller = createPrimitiveDescription(TruthTellerAnnotatorAE.class, + TruthTellerAnnotatorAE.PARAM_CONFIG , truthTellerAnnotationsFile); + + AnalysisEngineDescription[] descs = new AnalysisEngineDescription[] { + splitter, + tokenizer, + tagger, + ner, + parser, + truthteller, + }; + + // Step b) call initializeViews() + // initialize view with EOP default views. + initializeViews(descs); + + // Step c) set lang ID + languageIdentifier = "EN"; + } + catch (ResourceInitializationException e) + { + throw new LAPException(e); + } + } + + public BIUFullLAPWithTruthTeller(NameValueTable biuFullLAPSection, NameValueTable truthTellerSection) throws LAPException, ConfigurationException { + this( + biuFullLAPSection.getFile(DEFAULT_TAGGER_MODEL_FILE_PARAM).getAbsolutePath(), + biuFullLAPSection.getFile(DEFAULT_NER_MODEL_FILE_PARAM).getAbsolutePath(), + biuFullLAPSection.getString(DEFAULT_PARSER_HOST_NAME), + biuFullLAPSection.getInteger(DEFAULT_PARSER_PORT_NAME), + truthTellerSection.getFile(TransformationsConfigurationParametersNames.ANNOTATION_RULES_FILE).getAbsolutePath() + ); + } + + public BIUFullLAPWithTruthTeller(CommonConfig config) throws LAPException, ConfigurationException { + this(config.getSection(DEFAULT_SECTION_NAME), + config.getSection(TransformationsConfigurationParametersNames.TRUTH_TELLER_MODULE_NAME)); + } + + private static final String DEFAULT_SECTION_NAME = "rte_pairs_preprocess"; + private static final String DEFAULT_TAGGER_MODEL_FILE_PARAM = "easyfirst_stanford_pos_tagger"; + private static final String DEFAULT_NER_MODEL_FILE_PARAM = "stanford_ner_classifier_path"; + private static final String DEFAULT_PARSER_HOST_NAME = "easyfirst_host"; + private static final String DEFAULT_PARSER_PORT_NAME = "easyfirst_port"; + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAE.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAE.java index f88e3c35..f4daae01 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAE.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAE.java @@ -1,117 +1,117 @@ -package eu.excitementproject.eop.transformations.uima.ae.truthteller; - -import java.util.ArrayList; -import java.util.List; -import java.util.ListIterator; - -import org.apache.uima.UimaContext; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.jcas.JCas; -import org.apache.uima.resource.ResourceInitializationException; -import org.uimafit.util.JCasUtil; - -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; -import eu.excitement.type.predicatetruth.ClauseTruth; -import eu.excitement.type.predicatetruth.NegationAndUncertainty; -import eu.excitement.type.predicatetruth.PredicateSignature; -import eu.excitement.type.predicatetruth.PredicateTruth; -import eu.excitementproject.eop.common.representation.partofspeech.UnsupportedPosTagStringException; -import eu.excitementproject.eop.lap.biu.uima.CasTreeConverter; -import eu.excitementproject.eop.lap.biu.uima.CasTreeConverterException; -import eu.excitementproject.eop.lap.biu.uima.ae.SingletonSynchronizedAnnotator; -import eu.excitementproject.eop.transformations.biu.en.predicatetruth.PredicateTruthException; -import eu.excitementproject.eop.transformations.biu.en.predicatetruth.SingleTokenTruthAnnotation; -import eu.excitementproject.eop.transformations.representation.ExtendedNode; -import eu.excitementproject.eop.transformations.utilities.parsetreeutils.TreeUtilities; - -/** - * An analysis engine for truth annotations - * @author Gabi Stanovsky - * @since Aug 2014 - */ - -public abstract class PredicateTruthAE extends SingletonSynchronizedAnnotator { - - private CasTreeConverter converter; - - @Override - public void initialize(UimaContext aContext) throws ResourceInitializationException{ - super.initialize(aContext); - converter = new CasTreeConverter(); - } - - @Override - public void process(JCas aJCas) throws AnalysisEngineProcessException { - try { - // Get the raw sentences from the CAS - for (Sentence sentenceAnno : JCasUtil.select(aJCas, Sentence.class)) { - // get a list of all tokens in the current sentence - List tokens = JCasUtil.selectCovered(aJCas, Token.class, sentenceAnno); - List taggedTokens; - ExtendedNode annotatedSentence; - annotatedSentence = TreeUtilities.copyFromBasicNode(converter.convertSingleSentenceToTree(aJCas, sentenceAnno)); - - // run inner tool to obtain truth annotations - synchronized (innerTool) { - innerTool.setSentence(annotatedSentence); - innerTool.annotate(); - taggedTokens = innerTool.getAnnotatedEntities(); - } - - // iterate over all tokens and obtain their truth annotations - for (ListIterator it = tokens.listIterator(); it.hasNext();) { - int curIndex = it.nextIndex(); - Token tokenAnno = it.next(); - SingleTokenTruthAnnotation annotationResult = taggedTokens.get(curIndex); - - - if (annotationResult.getPredicateTruthValue() != null){ - // Predicate Truth - PredicateTruth ptTag = TruthMapping.mapPredicateTruth(annotationResult.getPredicateTruthValue(),aJCas,tokenAnno.getBegin(), tokenAnno.getEnd()); - ptTag.addToIndexes(); - - } - if (annotationResult.getClauseTruthValue() != null){ - // Clause Truth - //in this case the annotation result must hold a subordinate clause - pass it to the truth mapping - - //calculate a Token list from extendedNode list - List subtree = new ArrayList(); - for (ExtendedNode e : annotationResult.getSubtree()){ - subtree.add(tokens.get(e.getInfo().getNodeInfo().getSerial()-1)); - } - - // get boundaries from annotationResult and get them from the token's begin and and - int begin = tokens.get(annotationResult.getSubtreeMinimalIndex()).getBegin(), - end = tokens.get(annotationResult.getSubtreeMaximalIndex()).getEnd(); - ClauseTruth ctTag = TruthMapping.mapClauseTruth(annotationResult.getClauseTruthValue(), aJCas, subtree,begin,end); - ctTag.addToIndexes(); - - - } - - if (annotationResult.getNuValue() != null){ - // Negation and Uncertainty - NegationAndUncertainty nuTag = TruthMapping.mapNegationAndUncertainty(annotationResult.getNuValue(),aJCas,tokenAnno.getBegin(), tokenAnno.getEnd()); - nuTag.addToIndexes(); - } - - if (annotationResult.getPredicateSignatureValue() != null){ - // Predicate Signature - PredicateSignature sigTag = TruthMapping.mapPredicateSignature(annotationResult.getPredicateSignatureValue(),aJCas,tokenAnno.getBegin(), tokenAnno.getEnd()); - sigTag.addToIndexes(); - } - - - } - } - } - catch (CasTreeConverterException - | UnsupportedPosTagStringException - | PredicateTruthException e ) { - throw new AnalysisEngineProcessException(AnalysisEngineProcessException.ANNOTATOR_EXCEPTION, null, e); - } - } - -} +package eu.excitementproject.eop.transformations.uima.ae.truthteller; + +import java.util.ArrayList; +import java.util.List; +import java.util.ListIterator; + +import org.apache.uima.UimaContext; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.uimafit.util.JCasUtil; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.predicatetruth.ClauseTruth; +import eu.excitement.type.predicatetruth.NegationAndUncertainty; +import eu.excitement.type.predicatetruth.PredicateSignature; +import eu.excitement.type.predicatetruth.PredicateTruth; +import eu.excitementproject.eop.common.representation.partofspeech.UnsupportedPosTagStringException; +import eu.excitementproject.eop.lap.biu.uima.CasTreeConverter; +import eu.excitementproject.eop.lap.biu.uima.CasTreeConverterException; +import eu.excitementproject.eop.lap.biu.uima.ae.SingletonSynchronizedAnnotator; +import eu.excitementproject.eop.transformations.biu.en.predicatetruth.PredicateTruthException; +import eu.excitementproject.eop.transformations.biu.en.predicatetruth.SingleTokenTruthAnnotation; +import eu.excitementproject.eop.transformations.representation.ExtendedNode; +import eu.excitementproject.eop.transformations.utilities.parsetreeutils.TreeUtilities; + +/** + * An analysis engine for truth annotations + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public abstract class PredicateTruthAE extends SingletonSynchronizedAnnotator { + + private CasTreeConverter converter; + + @Override + public void initialize(UimaContext aContext) throws ResourceInitializationException{ + super.initialize(aContext); + converter = new CasTreeConverter(); + } + + @Override + public void process(JCas aJCas) throws AnalysisEngineProcessException { + try { + // Get the raw sentences from the CAS + for (Sentence sentenceAnno : JCasUtil.select(aJCas, Sentence.class)) { + // get a list of all tokens in the current sentence + List tokens = JCasUtil.selectCovered(aJCas, Token.class, sentenceAnno); + List taggedTokens; + ExtendedNode annotatedSentence; + annotatedSentence = TreeUtilities.copyFromBasicNode(converter.convertSingleSentenceToTree(aJCas, sentenceAnno)); + + // run inner tool to obtain truth annotations + synchronized (innerTool) { + innerTool.setSentence(annotatedSentence); + innerTool.annotate(); + taggedTokens = innerTool.getAnnotatedEntities(); + } + + // iterate over all tokens and obtain their truth annotations + for (ListIterator it = tokens.listIterator(); it.hasNext();) { + int curIndex = it.nextIndex(); + Token tokenAnno = it.next(); + SingleTokenTruthAnnotation annotationResult = taggedTokens.get(curIndex); + + + if (annotationResult.getPredicateTruthValue() != null){ + // Predicate Truth + PredicateTruth ptTag = TruthMapping.mapPredicateTruth(annotationResult.getPredicateTruthValue(),aJCas,tokenAnno.getBegin(), tokenAnno.getEnd()); + ptTag.addToIndexes(); + + } + if (annotationResult.getClauseTruthValue() != null){ + // Clause Truth + //in this case the annotation result must hold a subordinate clause - pass it to the truth mapping + + //calculate a Token list from extendedNode list + List subtree = new ArrayList(); + for (ExtendedNode e : annotationResult.getSubtree()){ + subtree.add(tokens.get(e.getInfo().getNodeInfo().getSerial()-1)); + } + + // get boundaries from annotationResult and get them from the token's begin and and + int begin = tokens.get(annotationResult.getSubtreeMinimalIndex()).getBegin(), + end = tokens.get(annotationResult.getSubtreeMaximalIndex()).getEnd(); + ClauseTruth ctTag = TruthMapping.mapClauseTruth(annotationResult.getClauseTruthValue(), aJCas, subtree,begin,end); + ctTag.addToIndexes(); + + + } + + if (annotationResult.getNuValue() != null){ + // Negation and Uncertainty + NegationAndUncertainty nuTag = TruthMapping.mapNegationAndUncertainty(annotationResult.getNuValue(),aJCas,tokenAnno.getBegin(), tokenAnno.getEnd()); + nuTag.addToIndexes(); + } + + if (annotationResult.getPredicateSignatureValue() != null){ + // Predicate Signature + PredicateSignature sigTag = TruthMapping.mapPredicateSignature(annotationResult.getPredicateSignatureValue(),aJCas,tokenAnno.getBegin(), tokenAnno.getEnd()); + sigTag.addToIndexes(); + } + + + } + } + } + catch (CasTreeConverterException + | UnsupportedPosTagStringException + | PredicateTruthException e ) { + throw new AnalysisEngineProcessException(AnalysisEngineProcessException.ANNOTATOR_EXCEPTION, null, e); + } + } + +} diff --git a/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/TruthMapping.java b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/TruthMapping.java index ecf4034e..4a2c9480 100644 --- a/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/TruthMapping.java +++ b/transformations/src/main/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/TruthMapping.java @@ -1,133 +1,133 @@ -package eu.excitementproject.eop.transformations.uima.ae.truthteller; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import org.apache.uima.cas.Type; -import org.apache.uima.jcas.JCas; -import org.apache.uima.jcas.cas.FSArray; - -import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; -import eu.excitement.type.predicatetruth.ClauseTruth; -import eu.excitement.type.predicatetruth.ClauseTruthNegative; -import eu.excitement.type.predicatetruth.ClauseTruthNotIdentified; -import eu.excitement.type.predicatetruth.ClauseTruthPositive; -import eu.excitement.type.predicatetruth.ClauseTruthUncertain; -import eu.excitement.type.predicatetruth.NegationAndUncertainty; -import eu.excitement.type.predicatetruth.NegationAndUncertaintyNegative; -import eu.excitement.type.predicatetruth.NegationAndUncertaintyPositive; -import eu.excitement.type.predicatetruth.NegationAndUncertaintyUncertain; -import eu.excitement.type.predicatetruth.PredicateSignature; -import eu.excitement.type.predicatetruth.PredicateSignatureNegativeNegative; -import eu.excitement.type.predicatetruth.PredicateSignatureNegativePositive; -import eu.excitement.type.predicatetruth.PredicateSignatureNegativeUncertain; -import eu.excitement.type.predicatetruth.PredicateSignaturePositiveNegative; -import eu.excitement.type.predicatetruth.PredicateSignaturePositivePositive; -import eu.excitement.type.predicatetruth.PredicateSignaturePositiveUncertain; -import eu.excitement.type.predicatetruth.PredicateSignatureUncertainNegative; -import eu.excitement.type.predicatetruth.PredicateSignatureUncertainPositive; -import eu.excitement.type.predicatetruth.PredicateSignatureUncertainUncertain; -import eu.excitement.type.predicatetruth.PredicateTruth; -import eu.excitement.type.predicatetruth.PredicateTruthNegative; -import eu.excitement.type.predicatetruth.PredicateTruthNotIdentified; -import eu.excitement.type.predicatetruth.PredicateTruthPositive; -import eu.excitement.type.predicatetruth.PredicateTruthUncertain; -import eu.excitementproject.eop.transformations.representation.annotations.PredTruth; - -/** - * Conversion class from Truthteller's annotations to UIMA annotations - * Each static function converts a different annotation type. - * @author Gabi Stanovsky - * @since Aug 2014 - */ - -public class TruthMapping { - - public static PredicateTruth mapPredicateTruth(PredTruth pt, JCas jcas, int begin, int end){ - Type type = jcas.getTypeSystem().getType(PRED_TRUTH_MAP.get(pt).getName()); - PredicateTruth ret = (PredicateTruth)jcas.getCas().createAnnotation(type, begin, end); - return ret; - } - - public static ClauseTruth mapClauseTruth(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth ct, JCas jcas, List subtree,int begin,int end){ - int subtreeSize = subtree.size(); - Type type = jcas.getTypeSystem().getType(CLAUSE_TRUTH_MAP.get(ct).getName()); - ClauseTruth ret = (ClauseTruth)jcas.getCas().createAnnotation(type, begin, end); - - // set the subtree tokens as a feature structure - FSArray subtreeFSArray = new FSArray(jcas, subtreeSize); - subtreeFSArray.copyFromArray(subtree.toArray(new Token[subtree.size()]), 0, 0, subtreeSize); - ret.setClauseTokens(subtreeFSArray); - return ret; - } - - public static NegationAndUncertainty mapNegationAndUncertainty(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty nu, JCas jcas,int begin,int end){ - Type type = jcas.getTypeSystem().getType(NU_MAP.get(nu).getName()); - NegationAndUncertainty ret = (NegationAndUncertainty)jcas.getCas().createAnnotation(type, begin, end); - return ret; - } - - public static PredicateSignature mapPredicateSignature(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature sig,JCas jcas,int begin,int end){ - Type type = jcas.getTypeSystem().getType(SIG_MAP.get(sig).getName()); - PredicateSignature ret = (PredicateSignature)jcas.getCas().createAnnotation(type, begin, end); - return ret; - } - - //static mapping from TruthTeller types to UIMA types - public static Map> PRED_TRUTH_MAP = new HashMap>(); - public static Map> CLAUSE_TRUTH_MAP = new HashMap>(); - public static Map> NU_MAP = new HashMap>(); - public static Map> SIG_MAP = new HashMap>(); - static - { - // predicate truth mapping - PRED_TRUTH_MAP.put(PredTruth.P, PredicateTruthPositive.class); - PRED_TRUTH_MAP.put(PredTruth.N, PredicateTruthNegative.class); - PRED_TRUTH_MAP.put(PredTruth.U, PredicateTruthUncertain.class); - PRED_TRUTH_MAP.put(PredTruth.O, PredicateTruthNotIdentified.class); - - // clause truth mapping - CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.P, ClauseTruthPositive.class); - CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.N, ClauseTruthNegative.class); - CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.U, ClauseTruthUncertain.class); - CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.O, ClauseTruthNotIdentified.class); - - // negation and uncertainty mapping - NU_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty.P, NegationAndUncertaintyPositive.class); - NU_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty.N, NegationAndUncertaintyNegative.class); - NU_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty.U, NegationAndUncertaintyUncertain.class); - - // predicate signature mapping - // signature: -/- - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_N, PredicateSignatureNegativeNegative.class); - // signature: -/+ - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_P, PredicateSignatureNegativePositive.class); - // signature: -/? - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_U, PredicateSignatureNegativeUncertain.class); - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_U_InfP, PredicateSignatureNegativeUncertain.class); - // signature: +/- - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_N, PredicateSignaturePositiveNegative.class); - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_N_InfP, PredicateSignaturePositiveNegative.class); - // signature: +/+ - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P, PredicateSignaturePositivePositive.class); - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP, PredicateSignaturePositivePositive.class); - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_N_P_InfP, PredicateSignaturePositivePositive.class); - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_N_U_InfP, PredicateSignaturePositivePositive.class); - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_P_N_InfP, PredicateSignaturePositivePositive.class); - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_P_U_InfP, PredicateSignaturePositivePositive.class); - // signature: +/? - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_U, PredicateSignaturePositiveUncertain.class); - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_U_FinP, PredicateSignaturePositiveUncertain.class); - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_U_InfP, PredicateSignaturePositiveUncertain.class); - // signature: ?/- - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.U_N, PredicateSignatureUncertainNegative.class); - // signature: ?/+ - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.U_P, PredicateSignatureUncertainPositive.class); - // signature: ?/? (default in unknown cases) - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.U_U, PredicateSignatureUncertainUncertain.class); - SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.NOT_IN_LEXICON, PredicateSignatureUncertainUncertain.class); - }; - - -} +package eu.excitementproject.eop.transformations.uima.ae.truthteller; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.uima.cas.Type; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.FSArray; + +import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token; +import eu.excitement.type.predicatetruth.ClauseTruth; +import eu.excitement.type.predicatetruth.ClauseTruthNegative; +import eu.excitement.type.predicatetruth.ClauseTruthNotIdentified; +import eu.excitement.type.predicatetruth.ClauseTruthPositive; +import eu.excitement.type.predicatetruth.ClauseTruthUncertain; +import eu.excitement.type.predicatetruth.NegationAndUncertainty; +import eu.excitement.type.predicatetruth.NegationAndUncertaintyNegative; +import eu.excitement.type.predicatetruth.NegationAndUncertaintyPositive; +import eu.excitement.type.predicatetruth.NegationAndUncertaintyUncertain; +import eu.excitement.type.predicatetruth.PredicateSignature; +import eu.excitement.type.predicatetruth.PredicateSignatureNegativeNegative; +import eu.excitement.type.predicatetruth.PredicateSignatureNegativePositive; +import eu.excitement.type.predicatetruth.PredicateSignatureNegativeUncertain; +import eu.excitement.type.predicatetruth.PredicateSignaturePositiveNegative; +import eu.excitement.type.predicatetruth.PredicateSignaturePositivePositive; +import eu.excitement.type.predicatetruth.PredicateSignaturePositiveUncertain; +import eu.excitement.type.predicatetruth.PredicateSignatureUncertainNegative; +import eu.excitement.type.predicatetruth.PredicateSignatureUncertainPositive; +import eu.excitement.type.predicatetruth.PredicateSignatureUncertainUncertain; +import eu.excitement.type.predicatetruth.PredicateTruth; +import eu.excitement.type.predicatetruth.PredicateTruthNegative; +import eu.excitement.type.predicatetruth.PredicateTruthNotIdentified; +import eu.excitement.type.predicatetruth.PredicateTruthPositive; +import eu.excitement.type.predicatetruth.PredicateTruthUncertain; +import eu.excitementproject.eop.transformations.representation.annotations.PredTruth; + +/** + * Conversion class from Truthteller's annotations to UIMA annotations + * Each static function converts a different annotation type. + * @author Gabi Stanovsky + * @since Aug 2014 + */ + +public class TruthMapping { + + public static PredicateTruth mapPredicateTruth(PredTruth pt, JCas jcas, int begin, int end){ + Type type = jcas.getTypeSystem().getType(PRED_TRUTH_MAP.get(pt).getName()); + PredicateTruth ret = (PredicateTruth)jcas.getCas().createAnnotation(type, begin, end); + return ret; + } + + public static ClauseTruth mapClauseTruth(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth ct, JCas jcas, List subtree,int begin,int end){ + int subtreeSize = subtree.size(); + Type type = jcas.getTypeSystem().getType(CLAUSE_TRUTH_MAP.get(ct).getName()); + ClauseTruth ret = (ClauseTruth)jcas.getCas().createAnnotation(type, begin, end); + + // set the subtree tokens as a feature structure + FSArray subtreeFSArray = new FSArray(jcas, subtreeSize); + subtreeFSArray.copyFromArray(subtree.toArray(new Token[subtree.size()]), 0, 0, subtreeSize); + ret.setClauseTokens(subtreeFSArray); + return ret; + } + + public static NegationAndUncertainty mapNegationAndUncertainty(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty nu, JCas jcas,int begin,int end){ + Type type = jcas.getTypeSystem().getType(NU_MAP.get(nu).getName()); + NegationAndUncertainty ret = (NegationAndUncertainty)jcas.getCas().createAnnotation(type, begin, end); + return ret; + } + + public static PredicateSignature mapPredicateSignature(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature sig,JCas jcas,int begin,int end){ + Type type = jcas.getTypeSystem().getType(SIG_MAP.get(sig).getName()); + PredicateSignature ret = (PredicateSignature)jcas.getCas().createAnnotation(type, begin, end); + return ret; + } + + //static mapping from TruthTeller types to UIMA types + public static Map> PRED_TRUTH_MAP = new HashMap>(); + public static Map> CLAUSE_TRUTH_MAP = new HashMap>(); + public static Map> NU_MAP = new HashMap>(); + public static Map> SIG_MAP = new HashMap>(); + static + { + // predicate truth mapping + PRED_TRUTH_MAP.put(PredTruth.P, PredicateTruthPositive.class); + PRED_TRUTH_MAP.put(PredTruth.N, PredicateTruthNegative.class); + PRED_TRUTH_MAP.put(PredTruth.U, PredicateTruthUncertain.class); + PRED_TRUTH_MAP.put(PredTruth.O, PredicateTruthNotIdentified.class); + + // clause truth mapping + CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.P, ClauseTruthPositive.class); + CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.N, ClauseTruthNegative.class); + CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.U, ClauseTruthUncertain.class); + CLAUSE_TRUTH_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.ClauseTruth.O, ClauseTruthNotIdentified.class); + + // negation and uncertainty mapping + NU_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty.P, NegationAndUncertaintyPositive.class); + NU_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty.N, NegationAndUncertaintyNegative.class); + NU_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.NegationAndUncertainty.U, NegationAndUncertaintyUncertain.class); + + // predicate signature mapping + // signature: -/- + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_N, PredicateSignatureNegativeNegative.class); + // signature: -/+ + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_P, PredicateSignatureNegativePositive.class); + // signature: -/? + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_U, PredicateSignatureNegativeUncertain.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.N_U_InfP, PredicateSignatureNegativeUncertain.class); + // signature: +/- + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_N, PredicateSignaturePositiveNegative.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_N_InfP, PredicateSignaturePositiveNegative.class); + // signature: +/+ + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P, PredicateSignaturePositivePositive.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP, PredicateSignaturePositivePositive.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_N_P_InfP, PredicateSignaturePositivePositive.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_N_U_InfP, PredicateSignaturePositivePositive.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_P_N_InfP, PredicateSignaturePositivePositive.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_P_FinP_P_U_InfP, PredicateSignaturePositivePositive.class); + // signature: +/? + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_U, PredicateSignaturePositiveUncertain.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_U_FinP, PredicateSignaturePositiveUncertain.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.P_U_InfP, PredicateSignaturePositiveUncertain.class); + // signature: ?/- + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.U_N, PredicateSignatureUncertainNegative.class); + // signature: ?/+ + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.U_P, PredicateSignatureUncertainPositive.class); + // signature: ?/? (default in unknown cases) + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.U_U, PredicateSignatureUncertainUncertain.class); + SIG_MAP.put(eu.excitementproject.eop.transformations.representation.annotations.PredicateSignature.NOT_IN_LEXICON, PredicateSignatureUncertainUncertain.class); + }; + + +} diff --git a/transformations/src/test/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAlignerTest.java b/transformations/src/test/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAlignerTest.java index 7a19c519..b0087ded 100644 --- a/transformations/src/test/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAlignerTest.java +++ b/transformations/src/test/java/eu/excitementproject/eop/transformations/component/alignment/predicatetruthlink/PredicateTruthAlignerTest.java @@ -1,193 +1,193 @@ -package eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink; - -import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_AGREEING_NEGATIVE; -import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_AGREEING_POSITIVE; -import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_DISAGREEING; -import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_NON_MATCHING; -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; - -import org.apache.uima.cas.CASException; -import org.apache.uima.jcas.JCas; -import org.junit.BeforeClass; -import org.junit.Test; -import org.uimafit.util.JCasUtil; - -import eu.excitement.type.alignment.Link; -import eu.excitement.type.alignment.Link.Direction; -import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; -import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; -import eu.excitementproject.eop.lap.LAPAccess; -import eu.excitementproject.eop.lap.LAPException; -import eu.excitementproject.eop.lap.biu.test.BiuTestUtils; -import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; -import eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner; -import eu.excitementproject.eop.transformations.uima.BIUFullLAPWithTruthTellerConfigured; -/** - * A test class for {@link PredicateTruthAligner} - * This test class must reside within transformations since it makes use the truthteller annotator, - * which is not accessible from core (where the aligner is implemented) - * @author Gabi Stanovsky - * @since Aug 2014 - * - */ -public class PredicateTruthAlignerTest { - - @BeforeClass - public static void beforeClass() throws IOException, LAPException, PairAnnotatorComponentException, CASException { - // Run test only under BIU environment - BiuTestUtils.assumeBiuEnvironment(); - // create a lap with truth teller annotator - lap = new BIUFullLAPWithTruthTellerConfigured(); - // create an aligner - aligner = new PredicateTruthAligner(); - // annotations for reference text - all tests will examine this result - jcas = lap.generateSingleTHPairCAS(testText, testHypothesis); - aligner.annotate(jcas); - hypoView = jcas.getView(LAP_ImplBase.HYPOTHESISVIEW); - - } - - @Test - public void testAgreeingPositive() throws Exception { - List observed = new ArrayList(); - // get all positive agreeing links - for (Link link : JCasUtil.select(hypoView, Link.class)) { - if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_AGREEING_POSITIVE)){ - observed.add(link); - } - } - // verify that the observed links match the expected - assertAlignmentListEqual(observed, positiveAgreeingExpectedText, positiveAgreeingExpectedHypo); - } - - @Test - public void testAgreeingNegative() throws Exception { - List observed = new ArrayList(); - // get all positive agreeing links - for (Link link : JCasUtil.select(hypoView, Link.class)) { - if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_AGREEING_NEGATIVE)){ - observed.add(link); - } - } - // verify that the observed links match the expected - assertAlignmentListEqual(observed, negativeAgreeingExpectedText, negativeAgreeingExpectedHypo); - } - - @Test - public void testDisagreeing() throws Exception { - List observed = new ArrayList(); - // get all positive agreeing links - for (Link link : JCasUtil.select(hypoView, Link.class)) { - if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_DISAGREEING)){ - observed.add(link); - } - } - // verify that the observed links match the expected - assertAlignmentListEqual(observed, disagreeingExpectedText, disagreeingExpectedHypo); - } - - @Test - public void testNonMatching() throws Exception { - List observed = new ArrayList(); - // get all positive agreeing links - for (Link link : JCasUtil.select(hypoView, Link.class)) { - if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_NON_MATCHING)){ - observed.add(link); - } - } - // verify that the observed links match the expected - assertAlignmentListEqual(observed, nonMatchingExpectedText, nonMatchingExpectedHypo); - } - - /** - * Verify that an observed list of annotations covers an expected list of strings - * @param observed - * @param expected - */ - private void assertAlignmentListEqual(Collection observed, List expectedText,List expectedHypo){ - // assert expected and observed annotations are of the same size - int s = observed.size(); - assertEquals(s,expectedText.size()); - - //iterate over expected and observed annotations and assert all are equal - Iterator expectedTextIter = expectedText.iterator(); - Iterator expectedHypoIter = expectedHypo.iterator(); - Iterator observedIter = observed.iterator(); - - for (int i=0;i positiveAgreeingExpectedText = new ArrayList(); - private static List positiveAgreeingExpectedHypo = new ArrayList(); - - //Agreeing Negative - private static List negativeAgreeingExpectedText = new ArrayList(); - private static List negativeAgreeingExpectedHypo = new ArrayList(); - - // Disagreeing - private static List disagreeingExpectedText = new ArrayList(); - private static List disagreeingExpectedHypo = new ArrayList(); - - // Non Matching - private static List nonMatchingExpectedText = new ArrayList(); - private static List nonMatchingExpectedHypo = new ArrayList(); - - - - static{ - positiveAgreeingExpectedText.add("refused"); - positiveAgreeingExpectedHypo.add("did"); - positiveAgreeingExpectedText.add("thought"); - positiveAgreeingExpectedHypo.add("did"); - - negativeAgreeingExpectedText.add("dance"); - negativeAgreeingExpectedHypo.add("dance"); - - disagreeingExpectedText.add("dance"); - disagreeingExpectedHypo.add("did"); - disagreeingExpectedText.add("thought"); - disagreeingExpectedHypo.add("dance"); - disagreeingExpectedText.add("refused"); - disagreeingExpectedHypo.add("dance"); - - nonMatchingExpectedText.add("jumping"); - nonMatchingExpectedHypo.add("did"); - nonMatchingExpectedText.add("jumping"); - nonMatchingExpectedHypo.add("dance"); - - } - - -} - - +package eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink; + +import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_AGREEING_NEGATIVE; +import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_AGREEING_POSITIVE; +import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_DISAGREEING; +import static eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner.ALIGNEMNT_TYPE_NON_MATCHING; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.junit.BeforeClass; +import org.junit.Test; +import org.uimafit.util.JCasUtil; + +import eu.excitement.type.alignment.Link; +import eu.excitement.type.alignment.Link.Direction; +import eu.excitementproject.eop.common.component.alignment.AlignmentComponent; +import eu.excitementproject.eop.common.component.alignment.PairAnnotatorComponentException; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.biu.test.BiuTestUtils; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; +import eu.excitementproject.eop.transformations.component.alignment.predicatetruthlink.PredicateTruthAligner; +import eu.excitementproject.eop.transformations.uima.BIUFullLAPWithTruthTellerConfigured; +/** + * A test class for {@link PredicateTruthAligner} + * This test class must reside within transformations since it makes use the truthteller annotator, + * which is not accessible from core (where the aligner is implemented) + * @author Gabi Stanovsky + * @since Aug 2014 + * + */ +public class PredicateTruthAlignerTest { + + @BeforeClass + public static void beforeClass() throws IOException, LAPException, PairAnnotatorComponentException, CASException { + // Run test only under BIU environment + BiuTestUtils.assumeBiuEnvironment(); + // create a lap with truth teller annotator + lap = new BIUFullLAPWithTruthTellerConfigured(); + // create an aligner + aligner = new PredicateTruthAligner(); + // annotations for reference text - all tests will examine this result + jcas = lap.generateSingleTHPairCAS(testText, testHypothesis); + aligner.annotate(jcas); + hypoView = jcas.getView(LAP_ImplBase.HYPOTHESISVIEW); + + } + + @Test + public void testAgreeingPositive() throws Exception { + List observed = new ArrayList(); + // get all positive agreeing links + for (Link link : JCasUtil.select(hypoView, Link.class)) { + if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_AGREEING_POSITIVE)){ + observed.add(link); + } + } + // verify that the observed links match the expected + assertAlignmentListEqual(observed, positiveAgreeingExpectedText, positiveAgreeingExpectedHypo); + } + + @Test + public void testAgreeingNegative() throws Exception { + List observed = new ArrayList(); + // get all positive agreeing links + for (Link link : JCasUtil.select(hypoView, Link.class)) { + if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_AGREEING_NEGATIVE)){ + observed.add(link); + } + } + // verify that the observed links match the expected + assertAlignmentListEqual(observed, negativeAgreeingExpectedText, negativeAgreeingExpectedHypo); + } + + @Test + public void testDisagreeing() throws Exception { + List observed = new ArrayList(); + // get all positive agreeing links + for (Link link : JCasUtil.select(hypoView, Link.class)) { + if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_DISAGREEING)){ + observed.add(link); + } + } + // verify that the observed links match the expected + assertAlignmentListEqual(observed, disagreeingExpectedText, disagreeingExpectedHypo); + } + + @Test + public void testNonMatching() throws Exception { + List observed = new ArrayList(); + // get all positive agreeing links + for (Link link : JCasUtil.select(hypoView, Link.class)) { + if (link.getLinkInfo().equals(ALIGNEMNT_TYPE_NON_MATCHING)){ + observed.add(link); + } + } + // verify that the observed links match the expected + assertAlignmentListEqual(observed, nonMatchingExpectedText, nonMatchingExpectedHypo); + } + + /** + * Verify that an observed list of annotations covers an expected list of strings + * @param observed + * @param expected + */ + private void assertAlignmentListEqual(Collection observed, List expectedText,List expectedHypo){ + // assert expected and observed annotations are of the same size + int s = observed.size(); + assertEquals(s,expectedText.size()); + + //iterate over expected and observed annotations and assert all are equal + Iterator expectedTextIter = expectedText.iterator(); + Iterator expectedHypoIter = expectedHypo.iterator(); + Iterator observedIter = observed.iterator(); + + for (int i=0;i positiveAgreeingExpectedText = new ArrayList(); + private static List positiveAgreeingExpectedHypo = new ArrayList(); + + //Agreeing Negative + private static List negativeAgreeingExpectedText = new ArrayList(); + private static List negativeAgreeingExpectedHypo = new ArrayList(); + + // Disagreeing + private static List disagreeingExpectedText = new ArrayList(); + private static List disagreeingExpectedHypo = new ArrayList(); + + // Non Matching + private static List nonMatchingExpectedText = new ArrayList(); + private static List nonMatchingExpectedHypo = new ArrayList(); + + + + static{ + positiveAgreeingExpectedText.add("refused"); + positiveAgreeingExpectedHypo.add("did"); + positiveAgreeingExpectedText.add("thought"); + positiveAgreeingExpectedHypo.add("did"); + + negativeAgreeingExpectedText.add("dance"); + negativeAgreeingExpectedHypo.add("dance"); + + disagreeingExpectedText.add("dance"); + disagreeingExpectedHypo.add("did"); + disagreeingExpectedText.add("thought"); + disagreeingExpectedHypo.add("dance"); + disagreeingExpectedText.add("refused"); + disagreeingExpectedHypo.add("dance"); + + nonMatchingExpectedText.add("jumping"); + nonMatchingExpectedHypo.add("did"); + nonMatchingExpectedText.add("jumping"); + nonMatchingExpectedHypo.add("dance"); + + } + + +} + + diff --git a/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTellerConfigured.java b/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTellerConfigured.java index 0982c58e..31f2da03 100644 --- a/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTellerConfigured.java +++ b/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/BIUFullLAPWithTruthTellerConfigured.java @@ -1,24 +1,24 @@ -package eu.excitementproject.eop.transformations.uima; - -import eu.excitementproject.eop.lap.LAPException; -import eu.excitementproject.eop.lap.biu.test.BiuTestParams; - -/*** - * A version of {@link BIUFullLAPWithTruthTeller} that is configured to paths relative to the biutee/workdir folder. - * Should be used only for testing, as these values should be read for configuration in other scenarios. - * - * @author Gabi Stanovsky - * @since August 2014 - */ - - -public class BIUFullLAPWithTruthTellerConfigured extends BIUFullLAPWithTruthTeller { - public BIUFullLAPWithTruthTellerConfigured() throws LAPException { - super( BiuTestParams.MAXENT_POS_TAGGER_MODEL_FILE, - BiuTestParams.STANFORD_NER_CLASSIFIER_PATH, - BiuTestParams.EASYFIRST_HOST, - BiuTestParams.EASYFIRST_PORT, - BiuTestParams.TRUTH_TELLER_MODEL_FILE); - } - -} +package eu.excitementproject.eop.transformations.uima; + +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.biu.test.BiuTestParams; + +/*** + * A version of {@link BIUFullLAPWithTruthTeller} that is configured to paths relative to the biutee/workdir folder. + * Should be used only for testing, as these values should be read for configuration in other scenarios. + * + * @author Gabi Stanovsky + * @since August 2014 + */ + + +public class BIUFullLAPWithTruthTellerConfigured extends BIUFullLAPWithTruthTeller { + public BIUFullLAPWithTruthTellerConfigured() throws LAPException { + super( BiuTestParams.MAXENT_POS_TAGGER_MODEL_FILE, + BiuTestParams.STANFORD_NER_CLASSIFIER_PATH, + BiuTestParams.EASYFIRST_HOST, + BiuTestParams.EASYFIRST_PORT, + BiuTestParams.TRUTH_TELLER_MODEL_FILE); + } + +} diff --git a/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAETest.java b/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAETest.java index e5ea41e9..be9daf3b 100644 --- a/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAETest.java +++ b/transformations/src/test/java/eu/excitementproject/eop/transformations/uima/ae/truthteller/PredicateTruthAETest.java @@ -1,118 +1,118 @@ -package eu.excitementproject.eop.transformations.uima.ae.truthteller; -import static org.junit.Assert.assertEquals; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; - -import org.apache.uima.cas.CASException; -import org.apache.uima.jcas.JCas; -import org.apache.uima.jcas.tcas.Annotation; -import org.junit.BeforeClass; -import org.junit.Test; -import org.uimafit.util.JCasUtil; - -import eu.excitement.type.predicatetruth.ClauseTruthNegative; -import eu.excitement.type.predicatetruth.NegationAndUncertaintyNegative; -import eu.excitement.type.predicatetruth.PredicateTruth; -import eu.excitement.type.predicatetruth.PredicateTruthNegative; -import eu.excitement.type.predicatetruth.PredicateTruthPositive; -import eu.excitementproject.eop.lap.LAPAccess; -import eu.excitementproject.eop.lap.LAPException; -import eu.excitementproject.eop.lap.biu.test.BiuTestUtils; -import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; -import eu.excitementproject.eop.transformations.biu.en.predicatetruth.TruthTellerAnnotatorAE; -import eu.excitementproject.eop.transformations.uima.BIUFullLAPWithTruthTellerConfigured; - -/** - * A test class for {@link TruthTellerAnnotatorAE} - * @author Gabi Stanovsky - * @since Aug 2014 - * - */ -public class PredicateTruthAETest { - - @BeforeClass - public static void beforeClass() throws LAPException, CASException, IOException { - // Run test only under BIU environment - BiuTestUtils.assumeBiuEnvironment(); - // create a lap with truth teller annotator - lap = new BIUFullLAPWithTruthTellerConfigured(); - // annotations for reference text - all tests will examine this result - jcas = lap.generateSingleTHPairCAS(testText, testHypothesis); - tView = jcas.getView(LAP_ImplBase.TEXTVIEW); - hView = jcas.getView(LAP_ImplBase.HYPOTHESISVIEW); - } - - @Test - public void testPT() throws Exception { - Collection annotations = new ArrayList(JCasUtil.select(tView, PredicateTruth.class)); - assertPTListEqual(annotations,ptExpected); - } - - @Test - public void testPTPositive() throws Exception { - Collection annotations = new ArrayList(JCasUtil.select(tView, PredicateTruthPositive.class)); - assertPTListEqual(annotations,ptPositiveExpected); - } - - @Test - public void testPTNegative() throws Exception { - Collection annotations = new ArrayList(JCasUtil.select(tView, PredicateTruthNegative.class)); - assertPTListEqual(annotations,ptNegativeExpected); - } - - @Test - public void testNU() throws Exception { - Collection annotations = new ArrayList(JCasUtil.select(hView, NegationAndUncertaintyNegative.class)); - assertPTListEqual(annotations,nuNegativeExpected); - } - - @Test - public void testCTNegative() throws Exception{ - Collection annotations = new ArrayList(JCasUtil.select(tView, ClauseTruthNegative.class)); - assertPTListEqual(annotations,ctNegativeExpected); - } - - /** - * Verify that an observed list of annotations covers an expected list of strings - * @param observed - * @param expected - */ - private static void assertPTListEqual(Collection observed, List expected){ - // assert expected and observed annotations are of the same size - int s = observed.size(); - assertEquals(s,expected.size()); - - //iterate over expected and observed annotations and assert all are equal - Iterator observedIter = observed.iterator(); - Iterator expectedIter = expected.iterator(); - for (int i=0;i ptExpected = new ArrayList(); - private static List ptPositiveExpected = new ArrayList(); - private static List ptNegativeExpected = new ArrayList(); - private static List nuNegativeExpected = new ArrayList(); - private static List ctNegativeExpected = new ArrayList(); - - static{ - ptExpected.add("refused"); - ptExpected.add("dance"); - ptPositiveExpected.add("refused"); - ptNegativeExpected.add("dance"); - nuNegativeExpected.add("dance"); - ctNegativeExpected.add("dance"); - } -} +package eu.excitementproject.eop.transformations.uima.ae.truthteller; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import org.apache.uima.cas.CASException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; +import org.junit.BeforeClass; +import org.junit.Test; +import org.uimafit.util.JCasUtil; + +import eu.excitement.type.predicatetruth.ClauseTruthNegative; +import eu.excitement.type.predicatetruth.NegationAndUncertaintyNegative; +import eu.excitement.type.predicatetruth.PredicateTruth; +import eu.excitement.type.predicatetruth.PredicateTruthNegative; +import eu.excitement.type.predicatetruth.PredicateTruthPositive; +import eu.excitementproject.eop.lap.LAPAccess; +import eu.excitementproject.eop.lap.LAPException; +import eu.excitementproject.eop.lap.biu.test.BiuTestUtils; +import eu.excitementproject.eop.lap.implbase.LAP_ImplBase; +import eu.excitementproject.eop.transformations.biu.en.predicatetruth.TruthTellerAnnotatorAE; +import eu.excitementproject.eop.transformations.uima.BIUFullLAPWithTruthTellerConfigured; + +/** + * A test class for {@link TruthTellerAnnotatorAE} + * @author Gabi Stanovsky + * @since Aug 2014 + * + */ +public class PredicateTruthAETest { + + @BeforeClass + public static void beforeClass() throws LAPException, CASException, IOException { + // Run test only under BIU environment + BiuTestUtils.assumeBiuEnvironment(); + // create a lap with truth teller annotator + lap = new BIUFullLAPWithTruthTellerConfigured(); + // annotations for reference text - all tests will examine this result + jcas = lap.generateSingleTHPairCAS(testText, testHypothesis); + tView = jcas.getView(LAP_ImplBase.TEXTVIEW); + hView = jcas.getView(LAP_ImplBase.HYPOTHESISVIEW); + } + + @Test + public void testPT() throws Exception { + Collection annotations = new ArrayList(JCasUtil.select(tView, PredicateTruth.class)); + assertPTListEqual(annotations,ptExpected); + } + + @Test + public void testPTPositive() throws Exception { + Collection annotations = new ArrayList(JCasUtil.select(tView, PredicateTruthPositive.class)); + assertPTListEqual(annotations,ptPositiveExpected); + } + + @Test + public void testPTNegative() throws Exception { + Collection annotations = new ArrayList(JCasUtil.select(tView, PredicateTruthNegative.class)); + assertPTListEqual(annotations,ptNegativeExpected); + } + + @Test + public void testNU() throws Exception { + Collection annotations = new ArrayList(JCasUtil.select(hView, NegationAndUncertaintyNegative.class)); + assertPTListEqual(annotations,nuNegativeExpected); + } + + @Test + public void testCTNegative() throws Exception{ + Collection annotations = new ArrayList(JCasUtil.select(tView, ClauseTruthNegative.class)); + assertPTListEqual(annotations,ctNegativeExpected); + } + + /** + * Verify that an observed list of annotations covers an expected list of strings + * @param observed + * @param expected + */ + private static void assertPTListEqual(Collection observed, List expected){ + // assert expected and observed annotations are of the same size + int s = observed.size(); + assertEquals(s,expected.size()); + + //iterate over expected and observed annotations and assert all are equal + Iterator observedIter = observed.iterator(); + Iterator expectedIter = expected.iterator(); + for (int i=0;i ptExpected = new ArrayList(); + private static List ptPositiveExpected = new ArrayList(); + private static List ptNegativeExpected = new ArrayList(); + private static List nuNegativeExpected = new ArrayList(); + private static List ctNegativeExpected = new ArrayList(); + + static{ + ptExpected.add("refused"); + ptExpected.add("dance"); + ptPositiveExpected.add("refused"); + ptNegativeExpected.add("dance"); + nuNegativeExpected.add("dance"); + ctNegativeExpected.add("dance"); + } +} diff --git a/util/pom.xml b/util/pom.xml index 44926633..3aba0172 100644 --- a/util/pom.xml +++ b/util/pom.xml @@ -4,7 +4,7 @@ eu.excitementproject eop - 1.2.0 + 1.2.1 util util @@ -25,17 +25,27 @@ eu.excitementproject common - 1.2.0 + 1.2.1 eu.excitementproject core - 1.2.0 + 1.2.1 + + + eu.excitementproject + alignmentedas + 1.2.1 + + + eu.excitementproject + adarte + 1.2.1 eu.excitementproject lap - 1.2.0 + 1.2.1 args4j @@ -46,13 +56,13 @@ eu.excitementproject biutee - 1.2.0 + 1.2.1 - - - + --> + diff --git a/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunner.java b/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunner.java index 57f957d5..919ed4ce 100644 --- a/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunner.java +++ b/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunner.java @@ -14,6 +14,8 @@ import org.apache.uima.jcas.JCas; import org.kohsuke.args4j.CmdLineParser; +import eu.excitementproject.eop.alignmentedas.p1eda.visualization.P1EdaVisualizer; +import eu.excitementproject.eop.alignmentedas.p1eda.visualization.Visualizer; import eu.excitementproject.eop.common.EDABasic; import eu.excitementproject.eop.common.EDAException; import eu.excitementproject.eop.common.TEDecision; @@ -49,6 +51,8 @@ public class EOPRunner { private String resultsFile = null; private String xmlResultsFile = null; + private String outputDir = "./"; + @SuppressWarnings("unused") private String language = "EN"; @@ -60,6 +64,7 @@ public class EOPRunner { private Logger logger; + private Visualizer visualizer = null; /** * @param args @@ -83,7 +88,7 @@ public EOPRunner(String[] args) { dih = new EOPRunnerInitializationHelper(); logger = Logger.getLogger("eu.excitementproject.eop.util.runner.EOPRunner"); - + if (args.length == 0) showHelp(parser); @@ -92,6 +97,16 @@ public EOPRunner(String[] args) { } catch (Exception e) { e.printStackTrace(); } + + // make the output directory if given + if (! option.output.isEmpty()) { + outputDir = option.output; + + File dir = new File(outputDir); + if (! dir.exists() || !dir.isDirectory()) { + dir.mkdirs(); + } + } } @@ -143,6 +158,13 @@ public void initializeConfigFile() { } } + /** + * Initialize P1EDA's visualizer to enable tracing + */ + public void initializeVisualizer() { + visualizer = new P1EdaVisualizer(); + } + /** * Find the value for a given parameter either from the command line arguments or from the configuration file * @@ -173,7 +195,7 @@ public void initializeEDA() { logger.error("Could not create EDA object"); e.printStackTrace(); } - logger.info("EDA object created from class " + eda.getClass()); + logger.info("EDA object created from class " + eda.getClass()); } /** @@ -198,10 +220,19 @@ public void runEOPTrain() { * @param outDir -- directory for storing the results in the web-demo-friendly format */ public void runEOPTest(String testDirStr, String outDir) { + + File outDirectory = new File(outDir); + if (! outDirectory.exists()) + outDirectory.mkdirs(); - resultsFile = outDir + "/" + configFile.getName() + "_results.txt"; - xmlResultsFile = outDir + "/" + configFile.getName() + "_results.xml"; - + if (option.results != null) { + resultsFile = option.results; + xmlResultsFile = option.results.replaceAll(".txt$", ".xml"); + } else { + resultsFile = outDir + "/" + configFile.getName() + "_results.txt"; + xmlResultsFile = outDir + "/" + configFile.getName() + "_results.xml"; + } + try { File testDir = new File(testDirStr); @@ -217,9 +248,17 @@ public void runEOPTest(String testDirStr, String outDir) { writer.write(OutputUtils.getPairID(cas) + "\t" + OutputUtils.getGoldLabel(cas) + "\t" + teDecision.getDecision().toString() + "\t" + teDecision.getConfidence() + "\n"); // hasGoldLabel = OutputUtils.getGoldLabel(cas); + + if (visualizer != null) { + OutputUtils.makeTraceHTML(teDecision, cas, outputDir, visualizer); + } } writer.close(); out.close(); + + // generate the XML results file + logger.info("Results file -- XML format: " + xmlResultsFile); + OutputUtils.generateXMLResults(option.testFile, resultsFile, xmlResultsFile); logger.info("Results file -- txt format: " + resultsFile); @@ -231,6 +270,8 @@ public void runEOPTest(String testDirStr, String outDir) { } catch (IOException e) { logger.info("Problem copying the configuration file " + configFile.getName() + " to directory " + outputDir.getName()); } + + // careful with the copying! The model file may have a relative path which must be first resolved! logger.info("Copying model in output directory " + outDir); @@ -248,30 +289,6 @@ public void runEOPTest(String testDirStr, String outDir) { } - public void scoreResults() { - - if (option.results != null) { - scoreResults(option.results,Paths.get(option.results + "_report.xml")); - } else { - if (option.testFile != null && resultsFile != null) { - logger.info("Results file -- XML format: " + xmlResultsFile); - OutputUtils.generateXMLResults(option.testFile, resultsFile, xmlResultsFile); - scoreResults(resultsFile,Paths.get(resultsFile + "_report.xml")); - } else { - logger.error("Could not score the results -- check that you have provided the correct test file, and that the results file (" + resultsFile + ") was properly generated"); - } - } - - } - - - public void scoreResults(String resultsFile, Path target) { - EDAScorer.score(new File(resultsFile), target.toString()); - logger.info("Results file: " + resultsFile); - logger.info("Evaluation file: " + target.toString()); - } - - /** * Run the platform on a single test/hypothesis pair @@ -284,19 +301,69 @@ public void runEOPSinglePair() { logger.info("Hypothesis: " + option.hypothesis); JCas aJCas = lapRunner.runLAP(option.text, option.hypothesis); + + if (option.results != null) { + resultsFile = option.results; + xmlResultsFile = option.results.replaceAll(".txt$", ".xml"); + } else { + resultsFile = outputDir + "/" + configFile.getName() + "_results.txt"; + xmlResultsFile = outputDir + "/" + configFile.getName() + "_results.xml"; + } + try { - TEDecision te = eda.process(aJCas); - logger.info("T/H pair processing result: " + te.getDecision() + " with confidence " + te.getConfidence()); - OutputUtils.makeSinglePairXML(te, aJCas, option.output, option.language); + TEDecision teDecision = eda.process(aJCas); + logger.info("T/H pair processing result: " + teDecision.getDecision() + " with confidence " + teDecision.getConfidence()); + OutputUtils.makeSinglePairXML(teDecision, aJCas, xmlResultsFile, option.language); + + if (visualizer != null) { + OutputUtils.makeTraceHTML(teDecision, aJCas, outputDir, visualizer); + } } catch (EDAException e) { - // TODO Auto-generated catch block + System.err.println("Problem running the EDA"); e.printStackTrace(); } catch (ComponentException e) { - // TODO Auto-generated catch block + System.err.println("Problem running a component of the EDA"); e.printStackTrace(); - } + } } + + + /** + * Score the results relative to the given gold standard + */ + public void scoreResults() { + + String availableResultsFile = resultsFile; + if (option.results != null) { + availableResultsFile = option.results; + } + if (availableResultsFile != null) { + + if (xmlResultsFile == null) { + xmlResultsFile = availableResultsFile.replaceAll(".txt$", ".xml"); + } + + scoreResults(availableResultsFile,Paths.get(availableResultsFile + "_report.xml")); + + if (option.testFile != null) { + logger.info("Results file -- XML format: " + xmlResultsFile); + OutputUtils.generateXMLResults(option.testFile, availableResultsFile, xmlResultsFile); + } else { + logger.error("Could not score the results -- the testFile option is missing"); + } + } else { + logger.error("Could not score the results -- check that you have provided the correct test file, and that the results file (" + availableResultsFile + ") was properly generated"); + } + + } + + + public void scoreResults(String resultsFile, Path target) { + EDAScorer.score(new File(resultsFile), target.toString()); + logger.info("Results file: " + resultsFile); + logger.info("Evaluation file: " + target.toString()); + } /** * When the command line arguments could not be parsed, show the help @@ -324,6 +391,10 @@ public void run() { initializeConfigFile(); setLanguage(); + + + if (option.trace) + initializeVisualizer(); if (option.lap != null) lapRunner = new LAPRunner(option.lap); @@ -335,11 +406,11 @@ public void run() { if (option.trainFile != null) { String trainFile = getOptionValue(option.trainFile, "trainFile"); - String trainDir = getOptionValue(option.trainDir, "trainDir"); - - logger.info("\t training file: " + trainFile + "\n\t training dir: " + trainDir); - + if (! option.nolap) { + String trainDir = getOptionValue(option.trainDir, "trainDir"); + logger.info("\t training file: " + trainFile + "\n\t training dir: " + trainDir); + lapRunner.runLAPOnFile(trainFile, trainDir); } } @@ -349,30 +420,28 @@ public void run() { if (option.testFile != null) { testFile = getOptionValue(option.testFile, "testFile"); - testDir = getOptionValue(option.testDir, "testDir"); - - logger.info("\t testing file: " + testFile + "\n\t testing dir: " + testDir); - if (! option.nolap) { + if ((! option.nolap) && (option.config != null)) { + testDir = getOptionValue(option.testDir, "testDir"); + logger.info("\t testing file: " + testFile + "\n\t testing dir: " + testDir); + lapRunner.runLAPOnFile(testFile, testDir); + } else { + logger.info("Skipping LAP processing (if you think it shouldn't skip, check that the config option was used and the configuration file was given, and that the option \"-nolap\" was not used. \n"); } } if (option.test) { // if (! option.train) - eda.initialize(config); + eda.initialize(config); testDir = getOptionValue(option.testDir, "testDir"); if (! option.text.isEmpty()) { runEOPSinglePair(); } else { - if (option.output.isEmpty()) { - runEOPTest(testDir, "./"); - } else { - runEOPTest(testDir,option.output); - } + runEOPTest(testDir,outputDir); } } diff --git a/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunnerCmdOptions.java b/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunnerCmdOptions.java index 170b4ad3..b75798df 100644 --- a/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunnerCmdOptions.java +++ b/util/src/main/java/eu/excitementproject/eop/util/runner/EOPRunnerCmdOptions.java @@ -60,5 +60,8 @@ public class EOPRunnerCmdOptions { @Option(name="-score", usage="Compute the scores on the test data or not") public boolean score = false; + + @Option(name="-trace", usage="Produce output (one file per pair) that allows the visualization of the alignment. Only usable with an alignment EDA") + public boolean trace = false; } diff --git a/util/src/main/java/eu/excitementproject/eop/util/runner/LAPRunner.java b/util/src/main/java/eu/excitementproject/eop/util/runner/LAPRunner.java index baede3f0..a70f5b65 100644 --- a/util/src/main/java/eu/excitementproject/eop/util/runner/LAPRunner.java +++ b/util/src/main/java/eu/excitementproject/eop/util/runner/LAPRunner.java @@ -1,13 +1,20 @@ package eu.excitementproject.eop.util.runner; import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; +import org.apache.uima.cas.impl.XmiCasSerializer; import org.apache.uima.jcas.JCas; +import org.apache.uima.util.XMLSerializer; +import org.xml.sax.SAXException; import eu.excitementproject.eop.common.configuration.CommonConfig; import eu.excitementproject.eop.common.exception.ConfigurationException; @@ -16,6 +23,7 @@ import eu.excitementproject.eop.lap.LAPException; import eu.excitementproject.eop.lap.PlatformCASProber; +@SuppressWarnings("unused") public class LAPRunner { private LAPAccess lap = null; @@ -185,17 +193,24 @@ public void runLAPOnFile(String inputFile, String outDir) { logger.info("Running lap on file: " + inputFile + " // writing output to directory " + outDir); - File dir = new File(outDir); - if (! dir.exists() || !dir.isDirectory()) { - dir.mkdir(); - } - try { + + File dir = new File(outDir); + if (! dir.exists() || !dir.isDirectory()) { + dir.mkdirs(); + } else { + FileUtils.cleanDirectory(dir); + } + lap.processRawInputFormat(new File(inputFile), dir); } catch (LAPException e) { System.err.println("Error running the LAP"); e.printStackTrace(); System.exit(1); + } catch (IOException e) { + System.err.println("Could not clean up LAP output directory " + outDir); + e.printStackTrace(); + System.exit(1); } } @@ -212,6 +227,9 @@ public JCas runLAP(String text, String hypothesis) { try { aJCas = lap.generateSingleTHPairCAS(text, hypothesis); PlatformCASProber.probeCasAndPrintContent(aJCas, System.out); + +// serializeCAS(aJCas); + } catch (LAPException e) { logger.error("Error running the LAP"); e.printStackTrace(); @@ -219,6 +237,32 @@ public JCas runLAP(String text, String hypothesis) { return aJCas; } + +/* + // code copied from LAP_ImplBase.java, just for testing the LAP when processing one pair from the command line + private void serializeCAS(JCas aJCas) throws LAPException { + // serialize + String xmiName = "1.from_commandLine.xmi"; + File xmiOutFile = new File("/tmp/", xmiName); + + try { + FileOutputStream out = new FileOutputStream(xmiOutFile); + XmiCasSerializer ser = new XmiCasSerializer(aJCas.getTypeSystem()); + XMLSerializer xmlSer = new XMLSerializer(out, false); + ser.serialize(aJCas.getCas(), xmlSer.getContentHandler()); + out.close(); + } catch (FileNotFoundException e) { + throw new LAPException("Unable to create/open the file" + xmiOutFile.toString(), e); + } catch (SAXException e) { + throw new LAPException("Failed to serialize the CAS into XML", e); + } catch (IOException e) { + throw new LAPException("Unable to access/close the file" + xmiOutFile.toString(), e); + } + + logger.info("Pair written as " + xmiOutFile.toString() ); + } +*/ + /** * @param args */ diff --git a/util/src/main/java/eu/excitementproject/eop/util/runner/OutputUtils.java b/util/src/main/java/eu/excitementproject/eop/util/runner/OutputUtils.java index c31f251e..1a9a5741 100644 --- a/util/src/main/java/eu/excitementproject/eop/util/runner/OutputUtils.java +++ b/util/src/main/java/eu/excitementproject/eop/util/runner/OutputUtils.java @@ -9,6 +9,7 @@ import java.io.OutputStreamWriter; import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.HashMap; import java.util.regex.Matcher; @@ -21,6 +22,8 @@ import org.apache.uima.jcas.cas.TOP; import eu.excitement.type.entailment.Pair; +import eu.excitementproject.eop.alignmentedas.p1eda.TEDecisionWithAlignment; +import eu.excitementproject.eop.alignmentedas.p1eda.visualization.Visualizer; import eu.excitementproject.eop.common.TEDecision; /** @@ -36,6 +39,7 @@ public static HashMap readResults(String file) { HashMap results = new HashMap(); Logger logger = Logger.getLogger("eu.excitementproject.eop.util.runner.OutputUtils:readResults"); + logger.info("Reading results from file: " + file); try { InputStream in = Files.newInputStream(Paths.get(file)); @@ -72,7 +76,7 @@ public static void generateXMLResults(String testFile, String resultsFile, Strin //BufferedReader reader = new BufferedReader(new InputStreamReader(in)); OutputStream out = Files.newOutputStream(Paths.get(xmlFile)); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out,"UTF-8")); String line = null, id; String[] entDec; @@ -102,16 +106,14 @@ public static void generateXMLResults(String testFile, String resultsFile, Strin } - public static void makeSinglePairXML(TEDecision decision, JCas aJCas, String outDir, String lang) { - - String xmlResultsFile = outDir + "/results.xml"; - + public static void makeSinglePairXML(TEDecision decision, JCas aJCas, String xmlResultsFile, String lang) { + Logger logger = Logger.getLogger("eu.excitementproject.eop.util.runner.OutputUtils:makeSinglePairXML"); try { OutputStream out = Files.newOutputStream(Paths.get(xmlResultsFile)); - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out)); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out,"UTF-8")); writer.write("\n"); writer.write(" \n"); @@ -162,4 +164,42 @@ public static String getGoldLabel(JCas aCas) { return p.getGoldAnswer(); } } + + /** + * Produce the html that shows the alignment between the text and hypothesis (if an alignment EDA was used to produce the decision) + * + * @param te -- the entailment decision as a TEDecision object + * @param aJCas -- a CAS object with the pair that was analyzed + * @param outDir -- output directory for the entire processing. The html file created will be but in /trace + * @param vis -- visualizer + */ + public static void makeTraceHTML(TEDecision te, JCas aJCas, String outDir, Visualizer vis) { + + Logger logger = Logger.getLogger("eu.excitementproject.eop.util.runner.OutputUtils:makeTraceHTML"); + + Path traceDir = Paths.get(outDir + "/trace"); + + String pairID = OutputUtils.getPairID(aJCas); + if (pairID == null) + pairID = "1"; + + String traceFile = outDir + "/trace/" + pairID + ".html"; + + try { + if ( Files.notExists(traceDir) ) // || ( ! Files.isDirectory(traceDir))) + Files.createDirectories(traceDir); + + TEDecisionWithAlignment decision = (TEDecisionWithAlignment) te; + + OutputStream out = Files.newOutputStream(Paths.get(traceFile)); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out,"UTF-8")); + writer.write(vis.generateHTML(decision)); + writer.close(); + out.close(); + + } catch (Exception e) { + logger.info("Error writing trace file for pair " + getPairID(aJCas)); + e.printStackTrace(); + } + } } diff --git a/util/src/test/java/eu/excitementproject/eop/util/runner/TestRunner_AdArte_EN.java b/util/src/test/java/eu/excitementproject/eop/util/runner/TestRunner_AdArte_EN.java new file mode 100644 index 00000000..3940d90c --- /dev/null +++ b/util/src/test/java/eu/excitementproject/eop/util/runner/TestRunner_AdArte_EN.java @@ -0,0 +1,37 @@ +package eu.excitementproject.eop.util.runner; + +import org.junit.*; + +public class TestRunner_AdArte_EN { + + @Ignore + @Test + public void test() { + String[] cmd = { + "-config", "../adarte/src/test/resources/configuration-file/AdArte_EN.xml", +// "-train", +// "-lap", "IT", +// "-trainFile","../adarte/src/test/resources/dataset/SICK_EN_EXAMPLE.xml", +// "-test", +// "-testFile","../adarte/src/test/resources/dataset/SICK_EN_EXAMPLE.xml, +// "-output","../core/src/test/resources/results/" + }; + /* + String[] cmd = { + "-config", "../core/src/main/resources/configuration-file/EditDistanceEDA_IT.xml", + "-train", + "-trainFile","./src/test/resources/data-set/it_demo_dev.xml", + "-test", + "-testFile","./src/test/resources/data-set/it_demo_test.xml", + "-output","./src/test/resources/results/"}; + */ + try { + EOPRunner runner = new EOPRunner(cmd); + runner.run(); + } catch (Exception e) { + System.out.println("Demo test failed! Command arguments: " + cmd.toString()); + e.printStackTrace(); + } + + } +}