-
Notifications
You must be signed in to change notification settings - Fork 0
/
kaijuxReport.pl
98 lines (77 loc) · 2.09 KB
/
kaijuxReport.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/perl
##################################################################
#
# Author:
# Dimitar Kenanov - [email protected]
#
# Produce a KAIJUX REPORT
#
# INPUTS:
# 1. KAIJUX output file
# 2. File with all sequence headers
#
##################################################################
use strict;
use warnings;
use Getopt::Long;
use Storable;
my ($opts, $HEADERS, $KOUT);
$opts=GetOptions("h=s" => \$HEADERS, # HEADERS_HASH_FILE, input
"k=s" => \$KOUT # KAIJUX OUTPUT, input
);
if((!$HEADERS && !$KOUT) || ! $HEADERS || !$KOUT){
die "\n Usage: kaijuxReport.pl -h HEADERS_HASH_FILE -k KAIJUX_OUT > REPORT\n\n";
}
my ($fhi,@tmp,$sid,$sname,$spid);
my ($HEAD,%REP);
# read HEADERS in hash and store it in a file
print STDERR "\n READING IN HEADERS...\n";
$HEAD=retrieve($HEADERS);
# read KAIJUX in hash
print STDERR "\n CREATING KAIJUX REPORT...\n";
my ($clsfd,$uclsfd,$totr);
$clsfd=$uclsfd=0;
open($fhi,'<',$KOUT);
while(<$fhi>){
chomp;
if($_ =~ /^U\t/){
$uclsfd++;
next;
}elsif($_ =~ /^C\t/){
$clsfd++;
@tmp=split/\t/,$_;
for my $i(@tmp){
if ($i =~ /^gi\|/){
$i=~s/,$//;
my @stmp=split/,/,$i;
for my $s(@stmp){
$s=~/\|(\w{2}_\d+\.\d+)\|/;
$sid=$1;
if(!exists $REP{$sid}){
$REP{$sid}=[1,@{$$HEAD{$sid}}];
}elsif(exists $REP{$sid}){
$REP{$sid}[0]++;
}
}
}
}
}
}
close $fhi;
# print stat
$totr=$uclsfd+$clsfd;
for my $k(keys %REP){
my $perc=$REP{$k}[0]/$totr;
push @{$REP{$k}},sprintf("%.2f",$perc);
}
print join("\t",'%','reads','id','desc','species'),"\n";
print "-------------------------------------------------------\n";
for my $k(reverse sort {$REP{$a}[0] <=> $REP{$b}[0]} keys %REP){
print join("\t",@{$REP{$k}}[3,0],$k,@{$REP{$k}}[1,2]),"\n";
}
print "-------------------------------------------------------\n";
print "\nDET:S: ",scalar(keys %REP)," :\n"; # Detected seqs
print "TOT:R: $totr :\n"; # Total Num Read/Contigs
print "TOT:C: $clsfd :\n"; # Total Num Classifiled Reads/Contigs
print "TOT:U: $uclsfd :\n"; # Total Num Unclassified Reads/Contigs
print STDERR "\n DONE\n";