Skip to content

Commit

Permalink
checks for /1 and /2 format
Browse files Browse the repository at this point in the history
  • Loading branch information
lskatz committed Mar 21, 2018
1 parent fcefffb commit 0e8756f
Showing 1 changed file with 60 additions and 13 deletions.
73 changes: 60 additions & 13 deletions src/bin/friends_ung.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ extern crate ross;
extern crate regex;

use ross::ross_base_options;
use ross::logmsg;
use regex::Regex;

use std::fs::File;
Expand All @@ -12,35 +13,81 @@ use std::env;
fn main() {
let args: Vec<String> = env::args().collect();
let mut opts = ross_base_options();
opts.optflag("x","blah","blah!");
opts.optopt("c","check-first","How many deflines to check to make sure the input is paired-end","INT");
let matches = opts.parse(&args[1..]).expect("ERROR: could not parse parameters");

// If there is a match on these, then mark invalid.
// In other words, we are looking for a pattern that
// is NOT the target seq or qual
let seq_regex = Regex::new(r"[^a-zA-Z]").expect("malformed seq regex");
let qual_regex= Regex::new(r"[^!-Z]").expect("malformed qual regex");
let slash_r1r2_regex = Regex::new(r"(.+)/([12])$").expect("malformed qual regex");

if matches.opt_present("help") {
println!("Convert a fastq file to a standard 4-lines-per-entry format\n{}",
println!("Determine paired-end-ness in an interleaved file. Currently only checks deflines for the /1 and /2 format\n{}",
opts.usage(&opts.short_usage(&args[0]))
);
std::process::exit(0);
}

let lines_per_read={
if matches.opt_present("paired-end") {
8
}else{
4
let check_first = {
if matches.opt_present("check-first") {
matches.opt_str("check-first")
.expect("Error reading the check-first option")
.parse()
.expect("ERROR converting the check-first parameter to an integer")
} else {
200
}
};

let mut id1=String::new();
let mut id2=String::new();
let mut pairs_counter=0;

let my_file = File::open("/dev/stdin").expect("Could not open file");
let my_buffer=BufReader::new(my_file);
for (i,line) in my_buffer.lines().enumerate() {
let line = line.expect("ERROR: could not read the next line in the input");
//match i%lines_per_read {
match i%8 {
0=>{
id1=line;
}
4=>{
id2=line;
pairs_counter+=1;

let caps1 = slash_r1r2_regex.captures(&id1).expect("ERROR: could not regex against id1");
let caps2 = slash_r1r2_regex.captures(&id2).expect("ERROR: could not regex against id2");

// Make sure the base name matches
if caps1[1] != caps2[1] {
let mut msg = "ID1 does not match ID2 on line ".to_string();
msg.push_str(&i.to_string());
msg.push_str("\n");
msg.push_str(&id1);
msg.push_str(" vs ");
msg.push_str(&id2);
logmsg(&msg);
std::process::exit(1);
}
// Make sure there is a 1/2 combo
if &caps1[2] != "1" || &caps2[2] != "2" {
let mut msg = "/1 is not followed by /2 on line ".to_string();
msg.push_str(&i.to_string());
msg.push_str("\n");
msg.push_str(&id1);
msg.push_str(" vs ");
msg.push_str(&id2);
logmsg(&msg);
std::process::exit(1);
}

if pairs_counter >= check_first {
break;
}

}
// We can safely ignore the seq, plus, and qual lines
_=>{ }
}
}

logmsg("The fastq input seems to be interleaved paired-end");
}

0 comments on commit 0e8756f

Please sign in to comment.