diff --git a/nbproject/build-impl.xml b/nbproject/build-impl.xml
index f09872d..d88f591 100644
--- a/nbproject/build-impl.xml
+++ b/nbproject/build-impl.xml
@@ -71,8 +71,8 @@ is divided into following sections:
-
-
+
+
@@ -101,7 +101,7 @@ is divided into following sections:
-
+
@@ -181,6 +181,7 @@ is divided into following sections:
+
@@ -216,7 +217,12 @@ is divided into following sections:
-
+
+
+
+
+
+
@@ -242,6 +248,7 @@ is divided into following sections:
+
@@ -718,7 +725,7 @@ is divided into following sections:
-
+
@@ -793,7 +800,7 @@ is divided into following sections:
-
+
@@ -820,7 +827,7 @@ is divided into following sections:
-
+
@@ -859,7 +866,7 @@ is divided into following sections:
-
+
@@ -871,7 +878,7 @@ is divided into following sections:
-
+
@@ -994,15 +1001,15 @@ is divided into following sections:
-
+
-
+
-
+
@@ -1010,7 +1017,7 @@ is divided into following sections:
-
+
@@ -1205,7 +1212,7 @@ is divided into following sections:
Must select one file in the IDE or set run.class
-
+
Must select one file in the IDE or set applet.url
diff --git a/nbproject/genfiles.properties b/nbproject/genfiles.properties
index e5b83ba..1293f1b 100644
--- a/nbproject/genfiles.properties
+++ b/nbproject/genfiles.properties
@@ -4,5 +4,5 @@ build.xml.stylesheet.CRC32=8064a381@1.75.2.48
# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
nbproject/build-impl.xml.data.CRC32=91437f43
-nbproject/build-impl.xml.script.CRC32=097bff4d
-nbproject/build-impl.xml.stylesheet.CRC32=876e7a8f@1.75.2.48
+nbproject/build-impl.xml.script.CRC32=9ddad943
+nbproject/build-impl.xml.stylesheet.CRC32=830a3534@1.80.1.48
diff --git a/src/libra/common/hadoop/io/reader/sequence/RawReadReader.java b/src/libra/common/hadoop/io/reader/sequence/RawReadReader.java
index e362a7b..f3d5696 100644
--- a/src/libra/common/hadoop/io/reader/sequence/RawReadReader.java
+++ b/src/libra/common/hadoop/io/reader/sequence/RawReadReader.java
@@ -35,110 +35,286 @@ public class RawReadReader implements Closeable {
private static final Log LOG = LogFactory.getLog(RawReadReader.class);
+ private static final int LINE_BUFFERS = 4;
+
private SampleFormat format;
private LineReader in;
- private Text buffer = new Text();
- private int bufferConsumed;
- private char delimiter;
+ private Text[] buffers = new Text[LINE_BUFFERS];
+ private int[] bufferConsumed = new int[LINE_BUFFERS];
+ private boolean eof = false;
private boolean finished = false;
public RawReadReader(SampleFormat format, InputStream in) {
this.format = format;
this.in = new LineReader(in);
- this.bufferConsumed = 0;
+ for(int i=0;i= LINE_BUFFERS) {
+ for(int i=0;i 0) {
+ // fill buffer
+ int filled = 0;
+ for(int i=0;i 0) {
+ return true;
+ } else {
+ return false;
+ }
} else {
return true;
}
}
-
- public long skipIncompleteRead() throws IOException {
+
+ private long _skipIncompleteFASTARead() throws IOException {
if(this.finished) {
return 0;
}
+ boolean hasBufferData = _fillBuffer();
+ if(!hasBufferData) {
+ //EOF
+ this.finished = true;
+ return 0;
+ }
+
long bytesConsumed = 0;
+ boolean headerFound = false;
+ while(hasBufferData) {
+ if(this.buffers[0].getLength() > 0 && this.buffers[0].charAt(0) == Read.FASTA_READ_DESCRIPTION_IDENTIFIER) {
+ headerFound = true;
+ break;
+ } else {
+ bytesConsumed += this.bufferConsumed[0];
+ // refill buffer
+ _shiftBuffer(1);
+ hasBufferData = _fillBuffer();
+ }
+ }
- boolean hasBufferData = _fillBuffer(false);
+ if(!headerFound) {
+ //EOF
+ this.finished = true;
+ }
+
+ return bytesConsumed;
+ }
+
+ private long _skipIncompleteFASTQRead() throws IOException {
+ if(this.finished) {
+ return 0;
+ }
+
+ boolean hasBufferData = _fillBuffer();
if(!hasBufferData) {
//EOF
this.finished = true;
- return bytesConsumed;
+ return 0;
}
+ long bytesConsumed = 0;
boolean headerFound = false;
while(hasBufferData) {
- if(this.buffer.getLength() > 0 && this.buffer.charAt(0) == this.delimiter) {
+ int emptyBufferCount = _countEmptyBuffer();
+ if(this.buffers[0].getLength() > 0 && this.buffers[0].charAt(0) == Read.FASTQ_READ_DESCRIPTION_IDENTIFIER &&
+ this.buffers[2].getLength() > 0 && this.buffers[2].charAt(0) == Read.FASTQ_READ_DESCRIPTION2_IDENTIFIER &&
+ emptyBufferCount == 0) {
headerFound = true;
break;
} else {
- bytesConsumed += this.bufferConsumed;
+ bytesConsumed += this.bufferConsumed[0];
// refill buffer
- hasBufferData = _fillBuffer(true);
+ _shiftBuffer(1);
+ hasBufferData = _fillBuffer();
}
}
if(!headerFound) {
//EOF
- bytesConsumed += this.bufferConsumed;
this.finished = true;
- this.bufferConsumed = 0;
}
return bytesConsumed;
}
- public long readRead(Read read) throws IOException {
+ public long skipIncompleteRead() throws IOException {
+ switch(this.format) {
+ case FASTA:
+ return _skipIncompleteFASTARead();
+ case FASTQ:
+ return _skipIncompleteFASTQRead();
+ default:
+ throw new IOException("Unknown format");
+ }
+ }
+
+ private void _printBuffer() {
+ for(int i=0;i 0 && this.buffers[0].charAt(0) == Read.FASTA_READ_DESCRIPTION_IDENTIFIER) {
+ // GO!
+ // add header
+ List lines = new ArrayList();
+
+ String lineStr = this.buffers[0].toString();
+ if(lineStr.trim().length() > 0) {
+ lines.add(lineStr);
+ }
+
+ bytesConsumed += this.bufferConsumed[0];
+ _shiftBuffer(1);
+ hasBufferData = _fillBuffer();
+
+ boolean nextHeaderFound = false;
+ while(hasBufferData) {
+ if(this.buffers[0].getLength() > 0 && this.buffers[0].charAt(0) == Read.FASTA_READ_DESCRIPTION_IDENTIFIER) {
+ nextHeaderFound = true;
+ break;
+ } else {
+ lineStr = this.buffers[0].toString();
+ if(lineStr.trim().length() > 0) {
+ lines.add(lineStr);
+ }
+
+ bytesConsumed += this.bufferConsumed[0];
+ // refill buffer
+ _shiftBuffer(1);
+ hasBufferData = _fillBuffer();
+ }
+ }
+
+ if(!nextHeaderFound) {
+ //EOF
+ this.finished = true;
+ }
+
+ read.parse(lines);
+ } else {
+ throw new IOException(String.format("Unknown data for FASTA read - %s", this.buffers[0].toString()));
+ }
+
+ return bytesConsumed;
+ }
+
+ private long _readFASTQRead(Read read) throws IOException {
read.clear();
long bytesConsumed = 0;
@@ -149,40 +325,50 @@ public long readRead(Read read) throws IOException {
}
// check buffer has a header
- boolean hasBufferData = _fillBuffer(false);
+ boolean hasBufferData = _fillBuffer();
if(!hasBufferData) {
//EOF
this.finished = true;
return bytesConsumed;
}
- if(this.buffer.getLength() > 0 && this.buffer.charAt(0) == this.delimiter) {
+ int emptyBufferCount = _countEmptyBuffer();
+ if(this.buffers[0].getLength() > 0 && this.buffers[0].charAt(0) == Read.FASTQ_READ_DESCRIPTION_IDENTIFIER &&
+ this.buffers[2].getLength() > 0 && this.buffers[2].charAt(0) == Read.FASTQ_READ_DESCRIPTION2_IDENTIFIER &&
+ emptyBufferCount == 0) {
// GO!
// add header
List lines = new ArrayList();
- String lineStr = this.buffer.toString();
+ String lineStr = this.buffers[0].toString();
if(lineStr.trim().length() > 0) {
lines.add(lineStr);
}
- bytesConsumed += this.bufferConsumed;
- hasBufferData = _fillBuffer(true);
+ bytesConsumed += this.bufferConsumed[0];
+ _shiftBuffer(1);
+ hasBufferData = _fillBuffer();
boolean nextHeaderFound = false;
while(hasBufferData) {
- if(this.buffer.getLength() > 0 && this.buffer.charAt(0) == this.delimiter) {
+ emptyBufferCount = _countEmptyBuffer();
+ //_printBuffer();
+
+ if(this.buffers[0].getLength() > 0 && this.buffers[0].charAt(0) == Read.FASTQ_READ_DESCRIPTION_IDENTIFIER &&
+ this.buffers[2].getLength() > 0 && this.buffers[2].charAt(0) == Read.FASTQ_READ_DESCRIPTION2_IDENTIFIER &&
+ emptyBufferCount == 0) {
nextHeaderFound = true;
break;
} else {
- lineStr = this.buffer.toString();
+ lineStr = this.buffers[0].toString();
if(lineStr.trim().length() > 0) {
lines.add(lineStr);
}
- bytesConsumed += this.bufferConsumed;
+ bytesConsumed += this.bufferConsumed[0];
// refill buffer
- hasBufferData = _fillBuffer(true);
+ _shiftBuffer(1);
+ hasBufferData = _fillBuffer();
}
}
@@ -193,9 +379,20 @@ public long readRead(Read read) throws IOException {
read.parse(lines);
} else {
- throw new IOException(String.format("Unknown data - %s", this.buffer.toString()));
+ throw new IOException(String.format("Unknown data for FASTQ read - %s", this.buffers[0].toString()));
}
return bytesConsumed;
}
+
+ public long readRead(Read read) throws IOException {
+ switch(this.format) {
+ case FASTA:
+ return _readFASTARead(read);
+ case FASTQ:
+ return _readFASTQRead(read);
+ default:
+ throw new IOException("Unknown format");
+ }
+ }
}