From 0e8756faae275cc7ac5a1832a4c7a5b858056e76 Mon Sep 17 00:00:00 2001 From: Lee Katz - Aspen Date: Wed, 21 Mar 2018 13:41:04 -0400 Subject: [PATCH] checks for /1 and /2 format --- src/bin/friends_ung.rs | 73 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 13 deletions(-) diff --git a/src/bin/friends_ung.rs b/src/bin/friends_ung.rs index 24830d69..14210c22 100644 --- a/src/bin/friends_ung.rs +++ b/src/bin/friends_ung.rs @@ -2,6 +2,7 @@ extern crate ross; extern crate regex; use ross::ross_base_options; +use ross::logmsg; use regex::Regex; use std::fs::File; @@ -12,35 +13,81 @@ use std::env; fn main() { let args: Vec = env::args().collect(); let mut opts = ross_base_options(); - opts.optflag("x","blah","blah!"); + opts.optopt("c","check-first","How many deflines to check to make sure the input is paired-end","INT"); let matches = opts.parse(&args[1..]).expect("ERROR: could not parse parameters"); - // If there is a match on these, then mark invalid. - // In other words, we are looking for a pattern that - // is NOT the target seq or qual - let seq_regex = Regex::new(r"[^a-zA-Z]").expect("malformed seq regex"); - let qual_regex= Regex::new(r"[^!-Z]").expect("malformed qual regex"); + let slash_r1r2_regex = Regex::new(r"(.+)/([12])$").expect("malformed qual regex"); if matches.opt_present("help") { - println!("Convert a fastq file to a standard 4-lines-per-entry format\n{}", + println!("Determine paired-end-ness in an interleaved file. Currently only checks deflines for the /1 and /2 format\n{}", opts.usage(&opts.short_usage(&args[0])) ); std::process::exit(0); } - let lines_per_read={ - if matches.opt_present("paired-end") { - 8 - }else{ - 4 + let check_first = { + if matches.opt_present("check-first") { + matches.opt_str("check-first") + .expect("Error reading the check-first option") + .parse() + .expect("ERROR converting the check-first parameter to an integer") + } else { + 200 } }; + let mut id1=String::new(); + let mut id2=String::new(); + let mut pairs_counter=0; + let my_file = File::open("/dev/stdin").expect("Could not open file"); let my_buffer=BufReader::new(my_file); for (i,line) in my_buffer.lines().enumerate() { let line = line.expect("ERROR: could not read the next line in the input"); - //match i%lines_per_read { + match i%8 { + 0=>{ + id1=line; + } + 4=>{ + id2=line; + pairs_counter+=1; + + let caps1 = slash_r1r2_regex.captures(&id1).expect("ERROR: could not regex against id1"); + let caps2 = slash_r1r2_regex.captures(&id2).expect("ERROR: could not regex against id2"); + + // Make sure the base name matches + if caps1[1] != caps2[1] { + let mut msg = "ID1 does not match ID2 on line ".to_string(); + msg.push_str(&i.to_string()); + msg.push_str("\n"); + msg.push_str(&id1); + msg.push_str(" vs "); + msg.push_str(&id2); + logmsg(&msg); + std::process::exit(1); + } + // Make sure there is a 1/2 combo + if &caps1[2] != "1" || &caps2[2] != "2" { + let mut msg = "/1 is not followed by /2 on line ".to_string(); + msg.push_str(&i.to_string()); + msg.push_str("\n"); + msg.push_str(&id1); + msg.push_str(" vs "); + msg.push_str(&id2); + logmsg(&msg); + std::process::exit(1); + } + + if pairs_counter >= check_first { + break; + } + + } + // We can safely ignore the seq, plus, and qual lines + _=>{ } + } } + + logmsg("The fastq input seems to be interleaved paired-end"); }