Skip to content

Commit

Permalink
Merge pull request #68 from uci-cbcl/wbiesing/optimizations-from-prof…
Browse files Browse the repository at this point in the history
…iling-BUILD_HYRACKS

optimizations from profiling build_hyracks
  • Loading branch information
jakebiesinger committed Dec 3, 2013
2 parents 5cdf31f + 17b8e0d commit 587d70b
Show file tree
Hide file tree
Showing 29 changed files with 342 additions and 243 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
Expand All @@ -30,9 +31,17 @@
import edu.uci.ics.genomix.minicluster.GenerateGraphViz.GRAPH_TYPE;
import edu.uci.ics.genomix.type.EdgeMap;
import edu.uci.ics.genomix.type.Kmer;
import edu.uci.ics.genomix.type.Node;
import edu.uci.ics.genomix.type.VKmer;

@SuppressWarnings("deprecation")
public class GenomixJobConf extends JobConf {

public static boolean debug = false;
public static ArrayList<VKmer> debugKmers;

private static Map<String, Long> tickTimes = new HashMap<String, Long>();

/* The following section ties together command-line options with a global JobConf
* Each variable has an annotated, command-line Option which is private here but
* is accessible through JobConf.get(GenomixConfigOld.VARIABLE).
Expand Down Expand Up @@ -277,8 +286,6 @@ public static void verifyPatterns(Patterns[] patterns) {
public static final String STATS_MIN_CONTIGLENGTH = "genomix.conf.minContigLength";
// intermediate date evaluation

private static Map<String, Long> tickTimes = new HashMap<String, Long>();

public GenomixJobConf(int kmerLength) {
super(new Configuration());
setInt(KMER_LENGTH, kmerLength);
Expand Down Expand Up @@ -512,7 +519,13 @@ public static long tock(String counter) {
public static void setGlobalStaticConstants(Configuration conf) {
Kmer.setGlobalKmerLength(Integer.parseInt(conf.get(GenomixJobConf.KMER_LENGTH)));
// EdgeWritable.MAX_READ_IDS_PER_EDGE = Integer.parseInt(conf.get(GenomixJobConf.MAX_READIDS_PER_EDGE));

EdgeMap.logReadIds = Boolean.parseBoolean(conf.get(GenomixJobConf.LOG_READIDS));
debug = conf.get(GenomixJobConf.DEBUG_KMERS) != null;
debugKmers = new ArrayList<VKmer>();
if (conf.get(GenomixJobConf.DEBUG_KMERS) != null) {
for (String kmer : conf.get(GenomixJobConf.DEBUG_KMERS).split(",")) {
debugKmers.add(new VKmer(kmer));
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ public static byte[] convertGraphToImg(JobConf conf, String srcDir, String destD

public static String convertEdgeToGraph(String outputNode, Node value, GRAPH_TYPE graphType) {
String outputEdge = "";
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
for (Entry<VKmer, ReadIdSet> e : value.getEdgeMap(et).entrySet()) {
String destNode = "";
switch (graphType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ public static byte fromSet(EnumSet<DIR> set) {
return b;
}

public final EnumSet<EDGETYPE> edgeTypes() {
public final EDGETYPE[] edgeTypes() {
return edgeTypesInDir(this);
}

public static final EnumSet<EDGETYPE> edgeTypesInDir(DIR direction) {
public static final EDGETYPE[] edgeTypesInDir(DIR direction) {
return direction == DIR.REVERSE ? EDGETYPE.INCOMING : EDGETYPE.OUTGOING;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.EnumSet;

import org.apache.hadoop.io.Writable;

public enum EDGETYPE implements Writable{
public enum EDGETYPE implements Writable {

FF((byte) (0b00)),
FR((byte) (0b01)),
Expand All @@ -26,8 +25,9 @@ public final byte get() {
return val;
}

public static final EnumSet<EDGETYPE> INCOMING = EnumSet.of(RF, RR);
public static final EnumSet<EDGETYPE> OUTGOING = EnumSet.of(FF, FR);
public static final EDGETYPE[] values = { FF, FR, RF, RR };
public static final EDGETYPE[] INCOMING = { RF, RR };
public static final EDGETYPE[] OUTGOING = { FF, FR };

public static EDGETYPE fromByte(short b) {
b &= MASK;
Expand Down Expand Up @@ -64,14 +64,14 @@ public static EDGETYPE mirror(EDGETYPE edgeType) {
throw new RuntimeException("Unrecognized direction in mirrorDirection: " + edgeType);
}
}

/**
*
*/
public static EDGETYPE getEdgeTypeFromDirToDir(DIR dir1, DIR dir2){
switch(dir1){
public static EDGETYPE getEdgeTypeFromDirToDir(DIR dir1, DIR dir2) {
switch (dir1) {
case FORWARD:
switch(dir2){
switch (dir2) {
case FORWARD:
return FF;
case REVERSE:
Expand All @@ -80,7 +80,7 @@ public static EDGETYPE getEdgeTypeFromDirToDir(DIR dir1, DIR dir2){
throw new IllegalArgumentException("Invalid direction2 given: " + dir2);
}
case REVERSE:
switch(dir2){
switch (dir2) {
case FORWARD:
return RF;
case REVERSE:
Expand All @@ -92,7 +92,7 @@ public static EDGETYPE getEdgeTypeFromDirToDir(DIR dir1, DIR dir2){
throw new IllegalArgumentException("Invalid direction1 given: " + dir2);
}
}

public DIR dir() {
return dir(this);
}
Expand All @@ -109,6 +109,23 @@ public static DIR dir(EDGETYPE edgeType) { // .dir static / non-static
throw new RuntimeException("Unrecognized direction in dirFromEdgeType: " + edgeType);
}
}

public DIR neighborDir() {
return neighborDir(this);
}

public static DIR neighborDir(EDGETYPE et) {
switch (et) {
case FF:
case RF:
return DIR.FORWARD;
case FR:
case RR:
return DIR.REVERSE;
default:
throw new RuntimeException("Unrecognized direction in dirFromEdgeType: " + et);
}
}

/**
* return the edgetype corresponding to moving across edge1 and edge2.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public class EdgeMap extends TreeMap<VKmer, ReadIdSet> implements Writable, Seri

private static final long serialVersionUID = 1L;
private static final int SIZE_INT = 4;
private static final int INITIAL_BYTE_ARRAY_SIZE = 30;
public static boolean logReadIds; // FIXME regression in usage of this (I broke it)

public EdgeMap() {
Expand Down Expand Up @@ -67,58 +68,62 @@ public void setAsCopy(EdgeMap other) {
}
}

public int getLengthInBytes() {
int total = SIZE_INT;
for (Entry<VKmer, ReadIdSet> e : entrySet()) {
total += e.getKey().getLength() + e.getValue().getLengthInBytes();
}
return total;
}

/**
* Return this Edge's representation as a new byte array
*/
public byte[] marshalToByteArray() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(getLengthInBytes());
ByteArrayOutputStream baos = new ByteArrayOutputStream(INITIAL_BYTE_ARRAY_SIZE);
DataOutputStream out = new DataOutputStream(baos);
write(out);
return baos.toByteArray();
}

public void setAsCopy(byte[] data, int offset) {
/**
* Populates this map from the given byte array
*
* @param data
* @param offset
* @return the offset pointing just beyond this EdgeMap in the byte array, that is, `offset + len(this)`
*/
public int setAsCopy(byte[] data, int offset) {
int curOffset = offset;
int count = Marshal.getInt(data, offset);
curOffset += SIZE_INT;
clear();
for (int i = 0; i < count; i++) {
VKmer kmer = new VKmer();
kmer.setAsCopy(data, curOffset);
curOffset += kmer.getLength();
curOffset = kmer.setAsCopy(data, curOffset);

ReadIdSet ids = new ReadIdSet();
ids.setAsCopy(data, curOffset);
curOffset += ids.getLengthInBytes();
curOffset = ids.setAsCopy(data, curOffset);

put(kmer, ids);
}
return curOffset;
}

public void setAsReference(byte[] data, int offset) {
/**
* Populates this map from the given byte array, keeping references where possible
*
* @param data
* @param offset
* @return the offset pointing just beyond this EdgeMap in the byte array, that is, `offset + len(this)`
*/
public int setAsReference(byte[] data, int offset) {
int curOffset = offset;
int count = Marshal.getInt(data, offset);
curOffset += SIZE_INT;
clear();
for (int i = 0; i < count; i++) {
VKmer kmer = new VKmer();
kmer.setAsReference(data, curOffset);
curOffset += kmer.getLength();
curOffset = kmer.setAsReference(data, curOffset);

ReadIdSet ids = new ReadIdSet();
ids.setAsCopy(data, curOffset);
curOffset += ids.getLengthInBytes();
curOffset = ids.setAsCopy(data, curOffset);

put(kmer, ids);
}
return curOffset;
}

@Override
Expand Down
Loading

0 comments on commit 587d70b

Please sign in to comment.