forked from deweylab/RSEM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert-sam-for-rsem
executable file
·115 lines (65 loc) · 2.85 KB
/
convert-sam-for-rsem
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env perl
use Getopt::Long;
use Pod::Usage;
use File::Basename;
use FindBin;
use lib $FindBin::RealBin;
use rsem_perl_utils qw(runCommand getSAMTOOLS);
use Env qw(@PATH);
@PATH = ($FindBin::RealBin, "$FindBin::RealBin/" . getSAMTOOLS(), @PATH);
use strict;
my $p = 1;
my $mem = "1G";
my $help = 0;
GetOptions("p|num-threads=i" => \$p,
"memory-per-thread=s" => \$mem,
"h|help" => \$help) or pd2usage(-exitval => 2, -verbose => 2);
pod2usage(-verbose => 2) if ($help == 1);
pod2usage(-msg => "Invalid number of arguments!", -exitval => 2, -verbose => 2) if (scalar(@ARGV) != 2);
my $command;
my $in_file = $ARGV[0];
my $tmp_file = "$ARGV[1].tmp.bam";
my $out_file = "$ARGV[1].bam";
# Phase I, sort entries so that all entries of a same read groups together
$command = "samtools sort -n -@ $p -m $mem -o $tmp_file $in_file";
&runCommand($command);
# Phase II, parse the temporary BAM file to make paired-end alignments' two mates adjacent to each other
$command = "rsem-scan-for-paired-end-reads $p $tmp_file $out_file";
&runCommand($command);
# delete temporary file
unlink $tmp_file or die "Could not delete $tmp_file.";
print "Conversion is completed. $out_file will be checked by 'rsem-sam-validator'.\n";
# Phase III, validate if the resulting bam file is correct
$command = "rsem-sam-validator $out_file";
&runCommand($command);
__END__
=head1 NAME
convert-sam-for-rsem
=head1 PURPOSE
Make a RSEM compatible BAM file.
=head1 SYNOPSIS
convert-sam-for-rsem [options] <input.sam/input.bam/input.cram> output_file_name
=head1 ARGUMENTS
=over
=item B<input.sam/input.bam/input.cram>
The SAM/BAM/CRAM file generated by user's aligner. We require this file contains the header section.
=item B<output_file_name>
The output name for the converted file. 'convert-sam-for-rsem' will output a BAM with the name 'output_file_name.bam'.
=back
=head1 OPTIONS
=over
=item B<-p/--num-threads> <int>
Set the number of threads to be used for converting. (Default: 1)
=item B<--memory-per-thread> <string>
Set the maximum allowable memory per thread. <string> represents the memory and accepts suffices 'K/M/G'. (Default: 1G)
=item B<-h/--help>
Show help information.
=back
=head1 DESCRIPTION
This program converts the SAM/BAM/CRAM file generated by user's aligner into a BAM file which RSEM can process. However, users should make sure their aligners use 'reference_name.idx.fa' generated by 'rsem-prepare-reference' as their references and output header sections. After the conversion, this program will call 'rsem-sam-validator' to validate the resulting BAM file.
Note: You do not need to run this script if `rsem-sam-validator' reports that your SAM/BAM/CRAM file is valid.
=head1 EXAMPLES
Suppose input is set to 'input.sam' and output file name is "output"
convert-sam-for-rsem input.sam output
We will get a file called 'output.bam' as output.
=cut