Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimizations from profiling build_hyracks #68

Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ public static byte[] convertGraphToImg(JobConf conf, String srcDir, String destD

public static String convertEdgeToGraph(String outputNode, Node value, GRAPH_TYPE graphType) {
String outputEdge = "";
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain the different between the values() and value ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

values() is a function that's automatically generated for all enum types and which returns a static final array including all the enum values (so FF, FR, RF and RR in this case). values as I've added here is shadowing that generated function with a public static final enum[]. I'm essentially bypassing the getter for this enum type, which was taking up to 5% of the total runtime.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can rename away from .values so it's not so ambiguous. In fact, it's weird that java lets me have a field called values when I can also call the function values().

for (Entry<VKmer, ReadIdSet> e : value.getEdgeMap(et).entrySet()) {
String destNode = "";
switch (graphType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ public static byte fromSet(EnumSet<DIR> set) {
return b;
}

public final EnumSet<EDGETYPE> edgeTypes() {
public final EDGETYPE[] edgeTypes() {
return edgeTypesInDir(this);
}

public static final EnumSet<EDGETYPE> edgeTypesInDir(DIR direction) {
public static final EDGETYPE[] edgeTypesInDir(DIR direction) {
return direction == DIR.REVERSE ? EDGETYPE.INCOMING : EDGETYPE.OUTGOING;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,9 @@ public final byte get() {
return val;
}

public static final EnumSet<EDGETYPE> INCOMING = EnumSet.of(RF, RR);
public static final EnumSet<EDGETYPE> OUTGOING = EnumSet.of(FF, FR);
public static final EDGETYPE[] values = {FF, FR, RF, RR};
public static final EDGETYPE[] INCOMING = {RF, RR};
public static final EDGETYPE[] OUTGOING = {FF, FR};

public static EDGETYPE fromByte(short b) {
b &= MASK;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ public EDGETYPE getNeighborEdgeType(DIR direction) {
if (degree(direction) != 1)
throw new IllegalArgumentException(
"getEdgetypeFromDir is used on the case, in which the vertex has and only has one EDGETYPE!");
EnumSet<EDGETYPE> ets = direction.edgeTypes();
EDGETYPE[] ets = direction.edgeTypes();
for (EDGETYPE et : ets) {
if (edges[et.get()] != null && getEdgeMap(et).size() > 0) {
return et;
Expand Down Expand Up @@ -268,7 +268,7 @@ public EdgeMap[] getEdges() {
}

public void setEdges(EdgeMap[] edges) {
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
setEdgeMap(et, edges[et.get()]);
}
}
Expand Down Expand Up @@ -343,7 +343,7 @@ public void setFlippedReadIds(ReadHeadSet flippedReadIds) {
*/
public int getSerializedLength() {
int length = Byte.SIZE / 8; // byte header
for (EDGETYPE e : EDGETYPE.values()) {
for (EDGETYPE e : EDGETYPE.values) {
if (edges[e.get()] != null && edges[e.get()].size() > 0) {
length += edges[e.get()].getLengthInBytes();
}
Expand Down Expand Up @@ -377,7 +377,7 @@ public void setAsCopy(byte[] data, int offset) {
reset();
byte activeFields = data[offset];
offset += 1;
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
// et.get() is the index of the bit; if non-zero, we this edge is present in the stream
if ((activeFields & (1 << et.get())) != 0) {
getEdgeMap(et).setAsCopy(data, offset);
Expand Down Expand Up @@ -406,7 +406,7 @@ public void setAsReference(byte[] data, int offset) {
reset();
byte activeFields = data[offset];
offset += 1;
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
// et.get() is the index of the bit; if non-zero, we this edge is present in the stream
if ((activeFields & (1 << et.get())) != 0) {
getEdgeMap(et).setAsReference(data, offset);
Expand All @@ -433,7 +433,7 @@ public void setAsReference(byte[] data, int offset) {

public static void write(Node n, DataOutput out) throws IOException {
out.writeByte(n.getActiveFields());
for (EDGETYPE e : EDGETYPE.values()) {
for (EDGETYPE e : EDGETYPE.values) {
if (n.edges[e.get()] != null && n.edges[e.get()].size() > 0) {
n.edges[e.get()].write(out);
}
Expand Down Expand Up @@ -461,7 +461,7 @@ public void write(DataOutput out) throws IOException {
public void readFields(DataInput in) throws IOException {
reset();
byte activeFields = in.readByte();
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
// et.get() is the index of the bit; if non-zero, we this edge is present in the stream
if ((activeFields & (1 << et.get())) != 0) {
getEdgeMap(et).readFields(in);
Expand Down Expand Up @@ -492,7 +492,7 @@ protected static class NODE_FIELDS {
protected byte getActiveFields() {
byte fields = 0;
// bits 0-3 are for presence of edges
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
if (edges[et.get()] != null && edges[et.get()].size() > 0) {
fields |= 1 << et.get();
}
Expand Down Expand Up @@ -530,7 +530,7 @@ public boolean equals(Object o) {
return false;

Node nw = (Node) o;
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
// If I'm null, return false if he's not null; otherwise, do a regular .equals
if (edges[et.get()] == null ? nw.edges[et.get()] != null : edges[et.get()].equals(nw.edges[et.get()])) {
return false;
Expand All @@ -550,7 +550,7 @@ public boolean equals(Object o) {
public String toString() {
StringBuilder sbuilder = new StringBuilder();
sbuilder.append('{');
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
sbuilder.append(et + ":").append(edges[et.get()] == null ? "null" : edges[et.get()].toString())
.append('\t');
}
Expand Down Expand Up @@ -740,7 +740,7 @@ public void mergeEdges(EDGETYPE edgeType, Node other) {

protected void addEdges(boolean flip, Node other) {
if (!flip) {
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
unionUpdateEdgeMap(et, et, other.edges);
}
} else {
Expand Down Expand Up @@ -848,7 +848,7 @@ protected void mergeUnflippedAndFlippedReadIDs(EDGETYPE edgeType, Node other) {
* Debug helper function to find the edge associated with the given kmer, checking all directions. If the edge doesn't exist in any direction, returns null
*/
public NeighborInfo findEdge(final VKmer kmer) {
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
if (edges[et.get()] != null && edges[et.get()].containsKey(kmer)) {
return new NeighborInfo(et, kmer, edges[et.get()].get(kmer));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.io.IOException;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.Map;
Expand Down Expand Up @@ -48,7 +49,7 @@ public static void assembleNodeRandomly(Node targetNode, int orderNum) {
ArrayList<SimpleEntry<VKmer, ReadIdSet>> sampleList;
SimpleEntry<VKmer, ReadIdSet> edgeId;
EdgeMap edge;
for (EDGETYPE e : EDGETYPE.values()) {
for (EDGETYPE e : EDGETYPE.values) {
sampleList = new ArrayList<SimpleEntry<VKmer, ReadIdSet>>();
for (int i = 0; i < min + (int) (Math.random() * ((max - min) + 1)); i++) {
String edgeStr = generateString(orderNum);
Expand Down Expand Up @@ -77,7 +78,7 @@ public static void assembleNodeRandomly(Node targetNode, int orderNum) {

public static void printSrcNodeInfo(Node srcNode) {
System.out.println("InternalKmer: " + srcNode.getInternalKmer().toString());
for (EDGETYPE e : EDGETYPE.values()) {
for (EDGETYPE e : EDGETYPE.values) {
System.out.println(e.toString());
for (Map.Entry<VKmer, ReadIdSet> iter : srcNode.getEdgeMap(e).entrySet()) {
System.out.println("edgeKmer: " + iter.getKey().toString());
Expand All @@ -101,7 +102,7 @@ public static void printSrcNodeInfo(Node srcNode) {

public static void compareTwoNodes(Node et1, Node et2) {
Assert.assertEquals(et1.getInternalKmer().toString(), et2.getInternalKmer().toString());
for (EDGETYPE e : EDGETYPE.values()) {
for (EDGETYPE e : EDGETYPE.values) {
Assert.assertEquals(et1.getEdgeMap(e).size(), et2.getEdgeMap(e).size());
for (Map.Entry<VKmer, ReadIdSet> iter1 : et1.getEdgeMap(e).entrySet()) {
Map.Entry<VKmer, ReadIdSet> iter2 = et2.getEdgeMap(e).pollFirstEntry();
Expand All @@ -127,7 +128,7 @@ public static void getEdgeMapRandomly(EdgeMap edgeMap, int orderNum) {
int max = 4;
ArrayList<SimpleEntry<VKmer, ReadIdSet>> sampleList;
SimpleEntry<VKmer, ReadIdSet> edgeId;
for (EDGETYPE e : EDGETYPE.values()) {
for (EDGETYPE e : EDGETYPE.values) {
sampleList = new ArrayList<SimpleEntry<VKmer, ReadIdSet>>();
for (int i = 0; i < min + (int) (Math.random() * ((max - min) + 1)); i++) {
String edgeStr = generateString(orderNum);
Expand Down Expand Up @@ -188,9 +189,9 @@ public void testDIR() throws IOException {
Assert.assertEquals(0b11 << 2, DIR.fromSet(EnumSet.allOf(DIR.class)));
Assert.assertEquals(0b00 << 2, DIR.fromSet(EnumSet.noneOf(DIR.class)));

EnumSet<EDGETYPE> edgeTypes1 = testDir1.edgeTypes();
EnumSet<EDGETYPE> edgeTypes1 = EnumSet.copyOf(Arrays.asList(testDir1.edgeTypes()));
EnumSet<EDGETYPE> edgeExample1 = EnumSet.noneOf(EDGETYPE.class);
EnumSet<EDGETYPE> edgeTypes2 = testDir2.edgeTypes();
EnumSet<EDGETYPE> edgeTypes2 = EnumSet.copyOf(Arrays.asList(testDir2.edgeTypes()));
EnumSet<EDGETYPE> edgeExample2 = EnumSet.noneOf(EDGETYPE.class);
edgeExample1.add(EDGETYPE.FF);
edgeExample1.add(EDGETYPE.FR);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public void reduce(VKmer key, Iterator<Node> values, OutputCollector<VKmer, Node
Node curNode;
while (values.hasNext()) {
curNode = values.next();
for (EDGETYPE e : EDGETYPE.values()) {
for (EDGETYPE e : EDGETYPE.values) {
outputNode.getEdgeMap(e).unionUpdate(curNode.getEdgeMap(e));
}
outputNode.getUnflippedReadIds().addAll(curNode.getUnflippedReadIds());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public void map(VKmer key, Node value, OutputCollector<Text, LongWritable> outpu

long totalEdgeReads = 0;
long totalSelf = 0;
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
for (Entry<VKmer, ReadIdSet> e : value.getEdgeMap(et).entrySet()) {
totalEdgeReads += e.getValue().size();
if (e.getKey().equals(key)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, i
// }
// }

for (EDGETYPE e : EDGETYPE.values()) {
for (EDGETYPE e : EDGETYPE.values) {
localUniNode.getEdgeMap(e).unionUpdate((readNode.getEdgeMap(e)));
}
localUniNode.getUnflippedReadIds().addAll(readNode.getUnflippedReadIds());
Expand All @@ -121,7 +121,7 @@ public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAcces
Node localUniNode = (Node) state.state;

readNode.setAsCopy(accessor.getBuffer().array(), getOffSet(accessor, tIndex, 1));
for (EDGETYPE e : EDGETYPE.values()) {
for (EDGETYPE e : EDGETYPE.values) {
localUniNode.getEdgeMap(e).unionUpdate(readNode.getEdgeMap(e));
}
localUniNode.getUnflippedReadIds().addAll(readNode.getUnflippedReadIds());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ public class ReadsKeyValueParserFactory implements IKeyValueParserFactory<LongWr
private final ConfFactory confFactory;

public static final RecordDescriptor readKmerOutputRec = new RecordDescriptor(new ISerializerDeserializer[2]);

private static final Pattern genePattern = Pattern.compile("[AGCT]+");

public ReadsKeyValueParserFactory(JobConf conf) throws HyracksDataException {
confFactory = new ConfFactory(conf);
Expand Down Expand Up @@ -131,7 +133,6 @@ public void parse(LongWritable key, Text value, IFrameWriter writer, String file
geneLine = rawLine[1];
}

Pattern genePattern = Pattern.compile("[AGCT]+");
Matcher geneMatcher = genePattern.matcher(geneLine);
if (geneMatcher.matches()) {
setReadInfo(mateId, readID, 0);
Expand Down Expand Up @@ -163,7 +164,7 @@ private void SplitReads(long readID, byte[] readLetters, IFrameWriter writer) {
DIR nextNodeDir = DIR.FORWARD;

/* middle kmer */
nextNode.reset();
nextNode = new Node();
nextNode.setAverageCoverage(1);
nextForwardKmer.setAsCopy(curForwardKmer);
for (int i = Kmer.getKmerLength(); i < readLetters.length; i++) {
Expand All @@ -176,9 +177,9 @@ private void SplitReads(long readID, byte[] readLetters, IFrameWriter writer) {

curForwardKmer.setAsCopy(nextForwardKmer);
curReverseKmer.setAsCopy(nextReverseKmer);
curNode.setAsCopy(nextNode);
curNode = nextNode;
curNodeDir = nextNodeDir;
nextNode.reset();
nextNode = new Node();
nextNode.setAverageCoverage(1);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ public VKmer getDestVertexId(DIR direction) {
+ getVertexValue().toString());

if (degree == 1) {
EnumSet<EDGETYPE> edgeTypes = direction.edgeTypes();
EDGETYPE[] edgeTypes = direction.edgeTypes();
for (EDGETYPE et : edgeTypes) {
if (getVertexValue().getEdgeMap(et).size() > 0)
return getVertexValue().getEdgeMap(et).firstKey();
Expand All @@ -205,7 +205,7 @@ public VKmer getDestVertexId(DIR direction) {
* check if I am a tandemRepeat
*/
public boolean isTandemRepeat(VertexValueWritable value) {
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
for (VKmer kmerToCheck : value.getEdgeMap(et).keySet()) {
if (kmerToCheck.equals(getVertexId())) {
repeatEdgetype = et;
Expand All @@ -222,7 +222,7 @@ public boolean isTandemRepeat(VertexValueWritable value) {
*/
public void broadcastKillself() {
VertexValueWritable vertex = getVertexValue();
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
for (VKmer dest : vertex.getEdgeMap(et).keySet()) {
outgoingMsg.reset();
outFlag &= EDGETYPE.CLEAR;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ public void receiveUpdates(Iterator<BubbleMergeMessage> msgIterator) {
deleteVertex(getVertexId());
break;
case ADD_READIDS:
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
EdgeMap edgeMap = incomingMsg.getNode().getEdgeMap(et);
if (edgeMap.size() > 0) {
getVertexValue().getEdgeMap(et).unionUpdate(edgeMap);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ public void updateNeighbors() {
}

DIR mergeDir = edgeType.dir();
EnumSet<EDGETYPE> mergeEdges = mergeDir.edgeTypes();
EDGETYPE[] mergeEdges = mergeDir.edgeTypes();

DIR updateDir = mergeDir.mirror();
EnumSet<EDGETYPE> updateEdges = updateDir.edgeTypes();
EDGETYPE[] updateEdges = updateDir.edgeTypes();

// prepare the update message s.t. the receiver can do a simple unionupdate
// that means we figure out any hops and place our merge-dir edges in the appropriate list of the outgoing msg
Expand Down Expand Up @@ -154,7 +154,7 @@ public void receiveUpdates(Iterator<M> msgIterator) {
// remove the edge to the node that will merge elsewhere
vertex.getEdgeMap(EDGETYPE.fromByte(incomingMsg.getFlag())).remove(incomingMsg.getSourceVertexId());
// add the node this neighbor will merge into
for (EDGETYPE edgeType : EDGETYPE.values()) {
for (EDGETYPE edgeType : EDGETYPE.values) {
vertex.getEdgeMap(edgeType).unionUpdate(incomingMsg.getEdgeList(edgeType));
}
updated = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public boolean repeatCanBeMerged() {
tmpValue.getEdgeMap(repeatEdgetype).remove(repeatKmer);
boolean hasFlip = false;
// pick one edge and flip
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
for (Entry<VKmer, ReadIdSet> edge : tmpValue.getEdgeMap(et).entrySet()) {
EDGETYPE flipEt = et.flipNeighbor();
tmpValue.getEdgeMap(flipEt).put(edge.getKey(), edge.getValue());
Expand Down Expand Up @@ -75,7 +75,7 @@ public void mergeTandemRepeat() {
getVertexValue().getEdgeMap(repeatEdgetype).remove(getVertexId());
boolean hasFlip = false;
/** pick one edge and flip **/
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
for (Entry<VKmer, ReadIdSet> edge : getVertexValue().getEdgeMap(et).entrySet()) {
EDGETYPE flipDir = et.flipNeighbor();
getVertexValue().getEdgeMap(flipDir).put(edge.getKey(), edge.getValue());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ public void addEdgeToInsertedBubble(EDGETYPE meToNewBubbleDir, VKmer insertedBub
}

public void setupEdgeForInsertedBubble() {
for (EDGETYPE et : EDGETYPE.values()) {
for (EDGETYPE et : EDGETYPE.values) {
edges[et.get()] = new EdgeMap();
}
edges[newBubbleToMajorEdgetype.get()].put(majorVertexId, new ReadIdSet(Arrays.asList(new Long(readId))));
Expand Down