Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parser #417

Merged
merged 10 commits into from
Nov 23, 2023
2 changes: 1 addition & 1 deletion .github/workflows/lphy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:

# -x test
- name: Execute Gradle build
run: ./gradlew build
run: ./gradlew build -x test

- name: Publish unit test results
uses: EnricoMi/publish-unit-test-result-action@v2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

/**
* Parse charset, especially codon position expressions.
* It should follow the Nexus Format (Maddison et al. 1997).
* <a href="https://doi.org/10.1093/sysbio/46.4.590"/>
*/
public class CharSetBlock {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ public void setChronoUnit(ChronoUnit chronoUnit) {
"If the string doesn't match charset's syntax, then check if the string matches " +
"a defined name in the nexus file. Otherwise it is an error. " +
"The string is referred to one partition at a call, but can be multiple blocks, " +
"such as d.charset(\"2-457\\3 660-896\\3\").",
"such as a dummy example: d.charset([\"2-457\\3\", \"660-896\\3\", \"1-.\\3\"]).",
narrativeName = "character set",
category = GeneratorCategory.TAXA_ALIGNMENT,
examples = {"twoPartitionCoalescentNex.lphy","https://linguaphylo.github.io/tutorials/time-stamped-data/"})
Expand Down
1 change: 1 addition & 0 deletions lphy-base/src/main/java/lphy/base/function/Map.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import lphy.core.model.annotation.GeneratorInfo;
import lphy.core.model.annotation.ParameterInfo;
//lphy.parser.functions.MapValue
//replaced by lphy.core.parser.function.MapFunction
@Deprecated
public class Map<K,V> extends DeterministicFunction<java.util.Map<K, V>> {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
package lphy.base.function.alignment;

import lphy.base.function.io.ReadFasta;
import lphy.base.function.io.ReadNexus;
import lphy.core.model.Value;

import java.util.Map;

/**
* For example, {@link #AGE_DIRECTION} and {@link #AGE_REGEX}.
* @author Walter Xie
* @see lphy.base.function.alignment.ReadNexus
* @see lphy.base.function.alignment.ReadFasta
* @see ReadNexus
* @see ReadFasta
*/
public final class MetaDataOptions {

Expand Down
123 changes: 123 additions & 0 deletions lphy-base/src/main/java/lphy/base/function/io/ReadDelim.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package lphy.base.function.io;

import lphy.base.evolution.alignment.MetaDataAlignment;
import lphy.core.io.UserDir;
import lphy.core.logger.LoggerUtils;
import lphy.core.model.DeterministicFunction;
import lphy.core.model.Value;
import lphy.core.model.annotation.GeneratorCategory;
import lphy.core.model.annotation.GeneratorInfo;
import lphy.core.model.annotation.ParameterInfo;
import lphy.core.model.datatype.TableValue;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
* D = readFasta(file="h3n2_2deme.fna");
* @see MetaDataAlignment
*/
public class ReadDelim extends DeterministicFunction<Map<String, List>> {

public ReadDelim(@ParameterInfo(name = ReaderConst.FILE, description = "the file name including path.")
Value<String> filePath,
@ParameterInfo(name = ReaderConst.DELIMITER,
description = "the separator (delimiter) to separate values in each row.")
Value<String> delimiter,
@ParameterInfo(name = ReaderConst.HEADER, description = "If 'header' is true, as default, " +
"then use the 1st row as the map keys, otherwise it will create keys and load the values " +
"from the 1st row.", optional=true)
Value<Boolean> header) {


if (filePath == null) throw new IllegalArgumentException("The file name can't be null!");
setParam(ReaderConst.FILE, filePath);
setParam(ReaderConst.DELIMITER, delimiter);

// default to true
if (header != null)
setParam(ReaderConst.HEADER, header);
else setParam(ReaderConst.HEADER, new Value<>(null, true));
}


@GeneratorInfo(name="readDelim", verbClause = "is read from",
category = GeneratorCategory.TAXA_ALIGNMENT,
description = "A function that loads values from a data delimited file and returns a map.")
public Value<Map<String, List>> apply() {

String filePath = ((Value<String>) getParams().get(ReaderConst.FILE)).value();
String delimiter = ((Value<String>) getParams().get(ReaderConst.DELIMITER)).value();
Boolean header = ((Value<Boolean>) getParams().get(ReaderConst.HEADER)).value();


Map<String, List> map = readDelim(filePath, delimiter, header);
return new TableValue(null, map, this);
}

private Map<String, List> readDelim(String filePath, String delimiter, boolean header) {
Map<String, List> dataMap = new LinkedHashMap<>();

Path path = UserDir.getUserPath(filePath);

try (BufferedReader reader = Files.newBufferedReader(path, StandardCharsets.UTF_8)) {
String line;
String[] keys = null;

if (header && (line = reader.readLine()) != null) {
// 1st row is col names
keys = line.split(delimiter);
} else if (!header) {
// 1st row is values, then create the default col names
line = reader.readLine();
if (line != null) {
String[] defaultKeys = new String[line.split(delimiter).length];
for (int i = 0; i < defaultKeys.length; i++) {
defaultKeys[i] = "Column" + (i + 1);
}
keys = defaultKeys;
}
}

int keyCount = keys != null ? keys.length : 0;

if (keyCount > 0) {
// put each column into a list
for (String key : keys) {
dataMap.put(key, new ArrayList<>());
}

while ((line = reader.readLine()) != null) {
String[] values = line.split(delimiter);

if (values.length == keyCount) {
for (int i = 0; i < keyCount; i++) {
// strings only currently
dataMap.get(keys[i]).add(values[i]);
}
} else {
LoggerUtils.log.warning("Not match the number columns, skipping line : " + line);
}
}
} else {
LoggerUtils.log.severe("File is empty !");
}
} catch (FileNotFoundException e) {
LoggerUtils.log.severe("File " + Path.of(filePath).toAbsolutePath() + " is not found !\n" +
"The current working dir = " + UserDir.getUserDir());
} catch (IOException e) {
LoggerUtils.logStackTrace(e);
}

return dataMap;
}

}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package lphy.base.function.alignment;
package lphy.base.function.io;

import jebl.evolution.io.FastaImporter;
import jebl.evolution.io.ImportException;
Expand All @@ -10,6 +10,7 @@
import lphy.base.evolution.alignment.Alignment;
import lphy.base.evolution.alignment.MetaDataAlignment;
import lphy.base.evolution.alignment.SimpleAlignment;
import lphy.base.function.alignment.MetaDataOptions;
import lphy.core.io.UserDir;
import lphy.core.logger.LoggerUtils;
import lphy.core.model.DeterministicFunction;
Expand All @@ -34,18 +35,15 @@
*/
public class ReadFasta extends DeterministicFunction<Alignment> {

private final String fileParamName = "file";
private final String optionsParamName = "options";

public ReadFasta(@ParameterInfo(name = fileParamName, description = "the name of fasta file.") Value<String> fileName,
@ParameterInfo(name = optionsParamName, description = "the map containing optional arguments and their values for reuse.",
public ReadFasta(@ParameterInfo(name = ReaderConst.FILE, description = "the name of fasta file including path.") Value<String> filePath,
@ParameterInfo(name = ReaderConst.OPTIONS, description = "the map containing optional arguments and their values for reuse.",
optional=true) Value<Map<String, String>> options ) {


if (fileName == null) throw new IllegalArgumentException("The file name can't be null!");
setParam(fileParamName, fileName);
if (filePath == null) throw new IllegalArgumentException("The file name can't be null!");
setParam(ReaderConst.FILE, filePath);

if (options != null) setParam(optionsParamName, options);
if (options != null) setParam(ReaderConst.OPTIONS, options);
}


Expand All @@ -54,17 +52,17 @@ public ReadFasta(@ParameterInfo(name = fileParamName, description = "the name of
description = "A function that parses an alignment from a fasta file.")
public Value<Alignment> apply() {

String fileName = ((Value<String>) getParams().get(fileParamName)).value();
String filePath = ((Value<String>) getParams().get(ReaderConst.FILE)).value();

Value<Map<String, String>> optionsVal = getParams().get(optionsParamName);
Value<Map<String, String>> optionsVal = getParams().get(ReaderConst.OPTIONS);
String ageDirectionStr = MetaDataOptions.getAgeDirectionStr(optionsVal);
String ageRegxStr = MetaDataOptions.getAgeRegxStr(optionsVal);
String spRegxStr = MetaDataOptions.getSpecieseRegex(optionsVal);

//*** parsing ***//
SequenceType sequenceType = SequenceType.NUCLEOTIDE;

Path nexPath = UserDir.getUserPath(fileName);
Path nexPath = UserDir.getUserPath(filePath);

Reader reader = getReader(nexPath.toString());

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package lphy.base.function.alignment;
package lphy.base.function.io;

import jebl.evolution.io.ImportException;
import lphy.base.evolution.alignment.MetaDataAlignment;
import lphy.base.function.alignment.MetaDataOptions;
import lphy.base.parser.NexusParser;
import lphy.core.io.UserDir;
import lphy.core.logger.LoggerUtils;
Expand All @@ -26,29 +27,26 @@
*/
public class ReadNexus extends DeterministicFunction<MetaDataAlignment> {

private final String fileParamName = "file";
private final String optionsParamName = "options";

Value<String> fileName;
Value<String> filePath;
Value<Map<String, String>> options;

public ReadNexus(@ParameterInfo(name = fileParamName, narrativeName = "file name", description = "the name of Nexus file.") Value<String> fileName,
@ParameterInfo(name = optionsParamName, description = "the map containing optional arguments and their values for reuse.",
public ReadNexus(@ParameterInfo(name = ReaderConst.FILE, narrativeName = "file name", description = "the name of Nexus file including path.") Value<String> filePath,
@ParameterInfo(name = ReaderConst.OPTIONS, description = "the map containing optional arguments and their values for reuse.",
optional=true) Value<Map<String, String>> options ) {
this.fileName = fileName;
this.filePath = filePath;
this.options = options;
}

public SortedMap<String, Value> getParams() {
SortedMap<String, Value> map = new TreeMap<>();
map.put(fileParamName, fileName);
if (options != null) map.put(optionsParamName, options);
map.put(ReaderConst.FILE, filePath);
if (options != null) map.put(ReaderConst.OPTIONS, options);
return map;
}

public void setParam(String paramName, Value value) {
if (paramName.equals(fileParamName)) fileName = value;
else if (paramName.equals(optionsParamName)) options = value;
if (paramName.equals(ReaderConst.FILE)) filePath = value;
else if (paramName.equals(ReaderConst.OPTIONS)) options = value;
else throw new RuntimeException("Unrecognised parameter name: " + paramName);
}

Expand All @@ -59,7 +57,7 @@ public void setParam(String paramName, Value value) {
description = "A function that parses an alignment from a Nexus file.")
public Value<MetaDataAlignment> apply() {

Path nexPath = UserDir.getUserPath(fileName.value());
Path nexPath = UserDir.getUserPath(filePath.value());

//*** parsing ***//
NexusParser nexusParser = new NexusParser(nexPath.toString());
Expand Down
10 changes: 10 additions & 0 deletions lphy-base/src/main/java/lphy/base/function/io/ReaderConst.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package lphy.base.function.io;

public final class ReaderConst {

public static final String FILE = "file";
public static final String OPTIONS = "options";

public static final String DELIMITER = "sep";
public static final String HEADER = "header";
}
5 changes: 4 additions & 1 deletion lphy-base/src/main/java/lphy/base/spi/LPhyBaseImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
import lphy.base.function.datatype.BinaryDatatypeFunction;
import lphy.base.function.datatype.NucleotidesFunction;
import lphy.base.function.datatype.StandardDatatypeFunction;
import lphy.base.function.io.ReadDelim;
import lphy.base.function.io.ReadFasta;
import lphy.base.function.io.ReadNexus;
import lphy.base.function.taxa.*;
import lphy.base.function.tree.ExtantTree;
import lphy.base.function.tree.MigrationCount;
Expand Down Expand Up @@ -93,7 +96,7 @@ public class LPhyBaseImpl implements LPhyExtension {
// Matrix
BinaryRateMatrix.class, MigrationMatrix.class, MigrationCount.class,
// IO
Newick.class, ReadNexus.class, ReadFasta.class, ExtractTrait.class, SpeciesTaxa.class,
Newick.class, ReadNexus.class, ReadFasta.class, ReadDelim.class, ExtractTrait.class, SpeciesTaxa.class,
// Math
SumBoolean.class, SumRows.class, SumCols.class, Sum2dArray.class, Sum.class,// Product.class,
// Set Op
Expand Down
1 change: 1 addition & 0 deletions lphy-base/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
exports lphy.base.function;
exports lphy.base.function.alignment;
exports lphy.base.function.datatype;
exports lphy.base.function.io;
exports lphy.base.function.taxa;
exports lphy.base.function.tree;

Expand Down
4 changes: 2 additions & 2 deletions lphy-base/src/test/java/lphy/base/LPhyExamplesTest.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package lphy.base;

import lphy.core.io.UserDir;
import lphy.core.parser.LPhyMetaParser;
import lphy.core.parser.LPhyParserDictionary;
import lphy.core.parser.REPL;
import org.junit.jupiter.api.Test;

Expand Down Expand Up @@ -52,7 +52,7 @@ public void testLPhyExamplesInDir(File exampleDir) {
}

UserDir.setUserDir(exampleDir.getPath());
LPhyMetaParser lPhyMetaParser = new REPL();
LPhyParserDictionary lPhyMetaParser = new REPL();
try {
FileReader lphyFile = new FileReader(exampleDir.getAbsoluteFile() + File.separator + fileName);
BufferedReader fin = new BufferedReader(lphyFile);
Expand Down
12 changes: 6 additions & 6 deletions lphy-studio/src/main/java/lphystudio/app/GraphvizDotUtils.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package lphystudio.app;

import lphy.core.model.*;
import lphy.core.parser.LPhyMetaParser;
import lphy.core.parser.LPhyParserDictionary;
import lphystudio.core.theme.ThemeColours;

import java.awt.*;
Expand All @@ -17,7 +17,7 @@ public class GraphvizDotUtils {

static boolean clusters = true;

public static String toGraphvizDot(Collection<GraphicalModelNode> nodes, LPhyMetaParser parser) {
public static String toGraphvizDot(Collection<GraphicalModelNode> nodes, LPhyParserDictionary parser) {

Set<GraphicalModelNode> done = new HashSet<>();
List<String> dataNodes = new ArrayList<>();
Expand Down Expand Up @@ -63,7 +63,7 @@ private static void appendCluster(String name, StringBuilder builder, List<Strin
builder.append("\n");
}

private static void toGraphvizDot(GraphicalModelNode node, Set<GraphicalModelNode> done, List<String> dataNodes, List<String> modelNodes, List<String> edges, LPhyMetaParser parser, boolean isData) {
private static void toGraphvizDot(GraphicalModelNode node, Set<GraphicalModelNode> done, List<String> dataNodes, List<String> modelNodes, List<String> edges, LPhyParserDictionary parser, boolean isData) {
if (done.contains(node)) {
// DO NOTHING
} else {
Expand Down Expand Up @@ -182,17 +182,17 @@ private static String graphvizNodeString(GraphicalModelNode node, String name, b
return name;
}

private static boolean isDataNode(GraphicalModelNode node, LPhyMetaParser parser) {
private static boolean isDataNode(GraphicalModelNode node, LPhyParserDictionary parser) {
if (node instanceof Value && !(node instanceof RandomVariable)) {
Value value = (Value)node;
if (!value.isAnonymous()) {
return (parser.hasValue(value.getId(), LPhyMetaParser.Context.data));
return (parser.hasValue(value.getId(), LPhyParserDictionary.Context.data));
}
}
return false;
}

private static String getUniqueId(GraphicalModelNode node, LPhyMetaParser parser) {
private static String getUniqueId(GraphicalModelNode node, LPhyParserDictionary parser) {
String name = node.getUniqueId();
if (node instanceof Value && !((Value)node).isAnonymous() && parser.isClamped(((Value) node).getId())) {
name = node.hashCode()+"";
Expand Down
Loading
Loading