Skip to content
This repository has been archived by the owner on Nov 28, 2020. It is now read-only.

[Bug]Hadoop-bam not closing seekable stream #147

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import scala.util.Properties

name := """bdg-sequila"""

version := "0.5.4-spark-2.4.1"
version := "0.5.5-spark-2.4.1-SNAPSHOT"

organization := "org.biodatageeks"

Expand Down Expand Up @@ -103,7 +103,7 @@ resolvers ++= Seq(
//fix for hdtsdjk patch in hadoop-bam and disq
assemblyShadeRules in assembly := Seq(
ShadeRule.rename("htsjdk.samtools.SAMRecordHelper" -> "htsjdk.samtools.SAMRecordHelperDisq").inLibrary("org.disq-bio" % "disq" % "0.3.0"),
ShadeRule.rename("htsjdk.samtools.SAMRecordHelper" -> "htsjdk.samtools.SAMRecordHelperHadoopBAM").inLibrary("org.seqdoop" % "hadoop-bam" % "7.10.0")
ShadeRule.rename("htsjdk.samtools.SAMRecordHelper" -> "htsjdk.samtools.SAMRecordHelperHadoopBAM").inLibrary("org.seqdoop" % "hadoop-bam" % "7.10.0").inProject

)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ trait BDGAlignFileReaderWriter [T <: BDGAlignInputFormat]{
logger.info(s"######## Reading ${resolvedPath} or ${path}")
val alignReadMethod = spark.sqlContext.getConf(BDGInternalParams.IOReadAlignmentMethod,"hadoopBAM").toLowerCase
logger.info(s"######## Using ${alignReadMethod} for reading alignment files.")
logger.info(s"######## Using inputformat: ${c.toString()}")

alignReadMethod match {
case "hadoopbam" => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ private int addProbabilisticSplits(
final SeekableStream sin =
WrapSeekable.openPath(path.getFileSystem(cfg), path);

final BAMSplitGuesser guesser = new BAMSplitGuesser(sin, cfg);
final org.seqdoop.hadoop_bam.BAMBDGSplitGuesser guesser = new org.seqdoop.hadoop_bam.BAMBDGSplitGuesser(sin, cfg);

FileVirtualSplit previousSplit = null;

Expand All @@ -247,6 +247,7 @@ private int addProbabilisticSplits(

long alignedBeg = guesser.guessNextBAMRecordStart(beg, end);


// As the guesser goes to the next BGZF block before looking for BAM
// records, the ending BGZF blocks have to always be traversed fully.
// Hence force the length to be 0xffff, the maximum possible.
Expand Down
35 changes: 35 additions & 0 deletions src/main/scala/org/biodatageeks/inputformats/BAMBDGRecord.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package htsjdk.samtools;


public class BAMBDGRecord extends BAMRecord {

private byte[] mRestOfBinaryData = null;
protected BAMBDGRecord(final SAMFileHeader header,
final int referenceID,
final int coordinate,
final short readNameLength,
final short mappingQuality,
final int indexingBin,
final int cigarLen,
final int flags,
final int readLen,
final int mateReferenceID,
final int mateCoordinate,
final int insertSize,
final byte[] restOfData) {
super(header,referenceID ,coordinate, readNameLength, mappingQuality, indexingBin, cigarLen, flags, readLen, mateReferenceID,mateCoordinate,insertSize,restOfData );

}

@Override
protected void eagerDecode() {
getReadName();
getCigar();
getReadBases();
getBaseQualities();
//getBinaryAttributes();
super.eagerDecode();
mRestOfBinaryData = null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,10 @@ public static long getKey0(int refIdx, int alignmentStart0) {
// initialize() and Hadoop-BAM's own code that relies on
// {@link BAMInputFormat} to call initialize() when the reader is
// created. Therefore we add this check for the time being.
if(isInitialized)
if(isInitialized) {
if(in != null) in.close();
close();
}
isInitialized = true;
reachedEnd = false;

Expand Down
Loading