Skip to content

Commit

Permalink
Merge branch 'tmp/v1.1.1' (early part) into release
Browse files Browse the repository at this point in the history
  • Loading branch information
Roberto Zanoli committed Feb 12, 2014
2 parents 0baae60 + 5b6efc8 commit 2ca48e9
Show file tree
Hide file tree
Showing 23 changed files with 334 additions and 104 deletions.
4 changes: 2 additions & 2 deletions biutee/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<parent>
<groupId>eu.excitementproject</groupId>
<artifactId>eop</artifactId>
<version>1.1.0</version>
<version>1.1.1</version>
</parent>
<artifactId>biutee</artifactId>
<name>biutee</name>
Expand Down Expand Up @@ -36,7 +36,7 @@
<dependency>
<groupId>eu.excitementproject</groupId>
<artifactId>transformations</artifactId>
<version>1.1.0</version>
<version>1.1.1</version>
</dependency>

</dependencies>
Expand Down
7 changes: 6 additions & 1 deletion common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<groupId>eu.excitementproject</groupId>
<artifactId>eop</artifactId>
<version>1.1.0</version>
<version>1.1.1</version>
</parent>
<artifactId>common</artifactId>
<name>common</name>
Expand Down Expand Up @@ -61,5 +61,10 @@
<groupId>org.apache.servicemix.bundles</groupId>
<artifactId>org.apache.servicemix.bundles.collections-generic</artifactId>
</dependency>
<dependency>
<groupId>org.uimafit</groupId>
<artifactId>uimafit</artifactId>
<version>1.4.0</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@


/**
* This is the top level interface for wrappers of lexical resources. Its methods accept tuples of <code>< lemma, part of speech></code> and
* retrieve {@link LexicalRule}s
* that match the criteria (each rule contains a resource-specific {@link RuleInfo} record).<br>
* This is the top level interface for wrappers of lexical resources. Its methods accept tuples of {@code <lemma, POS>} and
* retrieve {@link LexicalRule}s that match the criteria (each rule contains a resource-specific {@link RuleInfo} record).<br>
* In case the user gives <code>null</code> POS, implementations must retrieve rules for all possible POSs.<br>
* In case the user gives a POS that is not supported by the implemented lexical resource, then the implementation must return an empty list (not null).
* For instance, Wikipedia supports only nouns, and WordNet supports only nouns, verbs, adjectives and adverbs.
Expand All @@ -23,7 +22,7 @@
* query. The returned rules should always be the best available. This const should be accepted via Ctor.
* <p>
* <b>Note</b> The {@link LexicalResource} is oblivious of the context of the {@code lemma+pos}s it gets. E.g. it would retrieve the same rules for
* {@code < Windows, NOUN>} whether the context is OS or interior design. So <b>it is the user's responsibility to disambiguate</b> such terms.
* {@code <Windows, NOUN>} whether the context is OS or interior design. So <b>it is the user's responsibility to disambiguate</b> such terms.
*
* @author Amnon Lotan
* @since 06/05/2011
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public LexicalRule(String leftLemma, PartOfSpeech leftPos, String rightLemma, Pa
* @param leftPos
* @param rightLemma
* @param rightPos
* @param confidence the confidence score of the rule, in [0,1]. If now meaningful confidence score is available, the default is 0.5
* @param confidence the confidence score of the rule, in [0,1]. If no meaningful confidence score is available, the default is 0.5
* @param relation If the resource uses real relations (like Wordnet or Wiktionary), it's a String name of the relevant relation. Else, null
* @param resourceName the resource's name
* @param info the additional information of the rule
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package eu.excitementproject.eop.common.utilities.uima;

import static org.uimafit.factory.TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.impl.XmiCasDeserializer;
import org.apache.uima.cas.impl.XmiCasSerializer;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.resource.metadata.TypeDescription;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.XMLInputSource;
import org.apache.uima.util.XMLSerializer;
import org.xml.sax.SAXException;

/**
* Generic convenience methods when using UIMA.
*
* @author Ofer Bronstein
* @since August 2013
*/
public class UimaUtils {

/**
* Path, inside the project, to a descriptor file of a "dummy" analysis engine.
* This is required for working with XMI.
*/
public static final String DUMMY_AE_DESC = "src/main/resources/desc/DummyAE.xml";

// This class should not be instantiated
private UimaUtils() {}

/**
* Loads an AE from its descriptor.
*
* @param aeDescriptorPath path to an xml desciptor of the AE
* @return
* @throws InvalidXMLException
* @throws ResourceInitializationException
*/
public static AnalysisEngine loadAE(String aeDescriptorPath) throws InvalidXMLException, ResourceInitializationException {
InputStream s = UimaUtils.class.getResourceAsStream(aeDescriptorPath);
XMLInputSource in = new XMLInputSource(s, null);
ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
return UIMAFramework.produceAnalysisEngine(specifier);
}

/**
* Loads a CAS from its XMI file.
*
* @param xmiFile file to load
* @param aeDescriptorPath path to an XML descriptor of SOME analysis engine that connects
* to the type system used in the XMI. You can create some Dummy AE for that
* (see the one in lap: <tt>src/main/resources/desc/DummyAE.xml</tt>)
* @return a JCas object loaded to memory
*/
public static JCas loadXmi(File xmiFile, String aeDescriptorPath) throws InvalidXMLException, ResourceInitializationException, SAXException, IOException {
AnalysisEngine ae = UimaUtils.loadAE(aeDescriptorPath);
JCas jcas = ae.newJCas();
FileInputStream inputStream = new FileInputStream(xmiFile);
XmiCasDeserializer.deserialize(inputStream, jcas.getCas());
inputStream.close();
return jcas;
}

/**
* Loads a CAS from its XMI file. Uses <tt>src/main/resources/desc/DummyAE.xml</tt>
* as the required analysis engine descriptor.
*
* @param xmiFile file to load
* @return a JCas object loaded to memory
*/
public static JCas loadXmi(File xmiFile) throws InvalidXMLException, ResourceInitializationException, SAXException, IOException {
return loadXmi(xmiFile, DUMMY_AE_DESC);
}

/**
* Dumps the given JCas to a file on disk.
*
* @param xmiFile
* @param jcas
*/
public static void dumpXmi(File xmiFile, JCas jcas) throws SAXException, IOException {
FileOutputStream out = new FileOutputStream(xmiFile);
XmiCasSerializer ser = new XmiCasSerializer(jcas.getTypeSystem());
XMLSerializer xmlSer = new XMLSerializer(out, false);
ser.serialize(jcas.getCas(), xmlSer.getContentHandler());
out.close();
}

/**
* Loads a {@link TypeSystemDescription} from a given descriptor file path.
* This is useful for performing operation on the type system, e.g. for adding
* types dynamically in runtime.
*
* @param typeSystemDescriptorPath
* @return
*/
public static TypeSystemDescription loadTypeSystem(String typeSystemDescriptorPath) throws InvalidXMLException {
URL tsUrl = UimaUtils.class.getResource(typeSystemDescriptorPath);
TypeSystemDescription typeSystem = createTypeSystemDescriptionFromPath(tsUrl.toString());
typeSystem.resolveImports();
return typeSystem;
}

/**
* Loads a {@link TypeSystemDescription} from a given descriptor file path,
* and verifies that the given type exists there (in order to avoid loading
* and manipulating the wrong type system).
* This is useful for performing operation on the type system, e.g. for adding
* types dynamically in runtime.
*
* @param typeSystemDescriptorPath
* @param existingTypeName name of type to verify that exists in the loaded type system
* @return
*/
public static TypeSystemDescription loadTypeSystem(String typeSystemDescriptorPath, String existingTypeName) throws InvalidXMLException, UimaUtilsException {
TypeSystemDescription typeSystem = loadTypeSystem(typeSystemDescriptorPath);
TypeDescription type = typeSystem.getType(existingTypeName);
if (type == null) {
throw new UimaUtilsException("Could not find type " + existingTypeName + " in type system loaded from " + typeSystemDescriptorPath);
}
return typeSystem;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package eu.excitementproject.eop.common.utilities.uima;

public class UimaUtilsException extends Exception {

public UimaUtilsException(String message, Throwable cause) {
super(message, cause);
}

public UimaUtilsException(String message) {
super(message);
}

private static final long serialVersionUID = -169346567754793054L;

}
8 changes: 4 additions & 4 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<groupId>eu.excitementproject</groupId>
<artifactId>eop</artifactId>
<version>1.1.0</version>
<version>1.1.1</version>
</parent>
<artifactId>core</artifactId>
<name>core</name>
Expand All @@ -17,13 +17,13 @@
<dependency>
<groupId>eu.excitementproject</groupId>
<artifactId>common</artifactId>
<version>1.1.0</version>
<version>1.1.1</version>
</dependency>

<dependency>
<groupId>eu.excitementproject</groupId>
<artifactId>distsim</artifactId>
<version>1.1.0</version>
<version>1.1.1</version>
</dependency>

<dependency>
Expand Down Expand Up @@ -52,7 +52,7 @@
<dependency>
<groupId>eu.excitementproject</groupId>
<artifactId>lap</artifactId>
<version>1.1.0</version>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>unituebingen</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ private boolean isValidPos(PartOfSpeech pos) {
*/
public DerivBaseResource(CommonConfig config) throws ConfigurationException, ComponentException {
this(Boolean.parseBoolean(config.getSection("DerivBaseResource").getString("useScores")),
config.getSection("DerivBaseResource").getInteger("derivatonSteps"));
config.getSection("DerivBaseResource").getInteger("derivationSteps"));
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ public WordnetLexicalResource(
* @param chainingLength is the size of transitive relation chaining to be performed on the retrieved rules. E.g. if leftChainingLength = 3, then every
* hypernym/hyponym, merornym and holonym query will return rules with words related up to the 3rd degree (that's 1st, 2nd or 3rd) from the original term. Queries
* on non transitive relations are unaffected by this parameter. Must be positive.
* @param wordnetDictionaryImplementation The client's choice of underlying {@link Dictionary} implementation. May be null.
* @throws LexicalResourceException
*/
public WordnetLexicalResource(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ public class WordnetRuleInfoWithSenseNumsOnly extends WordnetRuleInfo {
private static final long serialVersionUID = -4077010286925937939L;

/**
* The only information is the sense numbers. -1 means all senses
* The sysnsets are dummy and empty and the relation was arbitrarily picked and should not be used (it doesn't mean a thing).
* The only information is the sense numbers. -1 means all senses.
* The synsets are dummy and empty and the relation was arbitrarily picked and should not be used (it doesn't mean a thing).
* @param leftSynsetNo
* @param rightSynsetNo
* @throws LexicalResourceException
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,24 @@
</section>

<!-- Specifies parameters for the derivational resource DErivBase (similar to CatVar for English). -->
<section name="DerivBaseResource">
<!-- Specifies if rule scores are available or not. True=file with scores avaliable, False=no scores.
If you choose a resource format containing scores, but declare "false" here (or vice versa), the
DerivBaseResource implementation will raise an exception.
DEFAULT: false. -->
<property name="scoreInfo">true</property>
<!-- Specifies the confidence in the derivational relationship of a lemma pair.
Values range between 0.00 and 1.00; The score is calculated as 1/n, where n is the length of the derivation
path. Thus, 1.00 trusts only pairs resulting from one linking rule; 0.5 trusts pairs which are linked by two
rules; 0.33 trusts pairs which are linked by three rules, etc.
This property is only read if "scoreInfo" = true.
DEFAULT: 0.00. -->
<property name="scoreConfidence">0.01</property>
<section name="DerivBaseResource">
<!-- Specifies if rule scores provided by the DErivBase resource should be used or not.
True = scores are used, False = scores are not used.
DEFAULT: true. -->
<property name="useScores">true</property>
<!-- Specifies the maximum accepted amount of derivation steps between two derivationally related lemmas
to count them as entailment pair. Integer values range between 1 and 10.
Each pair of lemmas within one derivational family is connected by a set of derivation rules. If one
lemma l2 can be directly derived from lemma l1, they are connected by one rule, thus, the number of
derivation steps is one. If l1 derives to l2, and l2 derives to l3, then the pair l1-l3 is connected
by two derivation steps, etc. The maximum amount of derivation steps to combine two lemmas is ten.
We assume the following: The more derivation steps must be carried out to connect two lemmas, the less
probable it is that these two lemmas are really derivationally related - and thus they are less probable
to build an enailment relationship.
Thus, a low value for the property "derivationSteps" only considers lemmas which are highly probably related.
NOTE: This property is only considered if the property "useScores" = true.
DEFAULT: 10. -->
<property name="derivationSteps">10</property>
</section>

<!-- Processing component for using dependency relations without Part-Of-Speech.-->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,27 @@
<property name="synonymConfidence">1.0</property>
</section>

<!-- Specifies parameters for the derivational resource DErivBase (similar to CatVar for English). -->
<section name="DerivBaseResource">
<!-- Specifies if rule scores are available or not. True=file with scores avaliable, False=no scores.
If you choose a resource format containing scores, but declare "false" here (or vice versa), the
DerivBaseResource implementation will raise an exception.
DEFAULT: false. -->
<property name="scoreInfo">true</property>
<!-- Specifies the confidence in the derivational relationship of a lemma pair.
Values range between 0.00 and 1.00; The score is calculated as 1/n, where n is the length of the derivation
path. Thus, 1.00 trusts only pairs resulting from one linking rule; 0.5 trusts pairs which are linked by two
rules; 0.33 trusts pairs which are linked by three rules, etc.
This property is only read if "scoreInfo" = true.
DEFAULT: 0.00. -->
<property name="scoreConfidence">0.01</property>
<!-- Specifies parameters for the derivational resource DErivBase (similar to CatVar for English). -->
<section name="DerivBaseResource">
<!-- Specifies if rule scores provided by the DErivBase resource should be used or not.
True = scores are used, False = scores are not used.
DEFAULT: true. -->
<property name="useScores">true</property>
<!-- Specifies the maximum accepted amount of derivation steps between two derivationally related lemmas
to count them as entailment pair. Integer values range between 1 and 10.
Each pair of lemmas within one derivational family is connected by a set of derivation rules. If one
lemma l2 can be directly derived from lemma l1, they are connected by one rule, thus, the number of
derivation steps is one. If l1 derives to l2, and l2 derives to l3, then the pair l1-l3 is connected
by two derivation steps, etc. The maximum amount of derivation steps to combine two lemmas is ten.
We assume the following: The more derivation steps must be carried out to connect two lemmas, the less
probable it is that these two lemmas are really derivationally related - and thus they are less probable
to build an enailment relationship.
Thus, a low value for the property "derivationSteps" only considers lemmas which are highly probably related.
NOTE: This property is only considered if the property "useScores" = true.
DEFAULT: 10. -->
<property name="derivationSteps">10</property>
</section>


<!-- Processing component for using dependency relations without Part-Of-Speech.-->
<section name="BagOfDepsScoring">
Expand Down
Loading

0 comments on commit 2ca48e9

Please sign in to comment.