Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add basic functionality to read indexed FASTA files #165

Closed
wants to merge 11 commits into from
1 change: 1 addition & 0 deletions hts-sys/wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "htslib/htslib/tbx.h"
#include "htslib/htslib/synced_bcf_reader.h"
#include "htslib/htslib/kbitset.h"
#include "htslib/htslib/faidx.h"


// The following functions have to be wrapped here because they are inline in htslib.
Expand Down
13 changes: 13 additions & 0 deletions src/faidx/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
use snafu::Snafu;
use std::path::PathBuf;

pub type Result<T, E = Error> = std::result::Result<T, E>;

#[derive(Snafu, Debug, PartialEq)]
#[snafu(visibility = "pub")]
pub enum Error {
#[snafu(display("file not found: {}", path.display()))]
FileNotFound { path: PathBuf },
#[snafu(display("invalid (non-unique) characters in path"))]
dlaehnemann marked this conversation as resolved.
Show resolved Hide resolved
NonUnicodePath,
}
146 changes: 146 additions & 0 deletions src/faidx/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
// Copyright 2019 Manuel Landesfeind, Evotec International GmbH
// Licensed under the MIT license (http://opensource.org/licenses/MIT)
// This file may not be copied, modified, or distributed
// except according to those terms.

//!
//! Module for working with faidx-indexed FASTA files.
//!

use std::ffi;
use std::path::Path;
use url::Url;

use crate::htslib;

pub mod errors;
pub use errors::{Error, Result};

fn path_as_bytes<'a, P: 'a + AsRef<Path>>(path: P, must_exist: bool) -> Result<Vec<u8>> {
if path.as_ref().exists() || !must_exist {
Ok(path
.as_ref()
.to_str()
.ok_or(Error::NonUnicodePath)?
.as_bytes()
.to_owned())
} else {
Err(Error::FileNotFound {
path: path.as_ref().to_owned(),
})
}
}
dlaehnemann marked this conversation as resolved.
Show resolved Hide resolved
/// A Fasta reader.
#[derive(Debug)]
pub struct Reader {
inner: *mut htslib::faidx_t,
}

impl Reader {
/// Create a new Reader from path.
///
/// # Arguments
///
/// * `path` - the path to open.
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
Self::new(&path_as_bytes(path, true)?)
}

/// Create a new Reader from URL.
dlaehnemann marked this conversation as resolved.
Show resolved Hide resolved
pub fn from_url(url: &Url) -> Result<Self, Error> {
Self::new(url.as_str().as_bytes())
}

/// Create a new Reader.
///
/// # Arguments
///
/// * `path` - the path to open
fn new(path: &[u8]) -> Result<Self, Error> {
let cpath = ffi::CString::new(path).unwrap();
let inner = unsafe { htslib::fai_load(cpath.as_ptr()) };
Ok(Self { inner })
}
dlaehnemann marked this conversation as resolved.
Show resolved Hide resolved

/// Fetches the sequence and returns it
///
/// # Arguments
///
/// * `name` - the name of the template sequence (e.g., "chr1")
/// * `begin` - the offset within the template sequence (starting with 0)
/// * `end` - the end position to return
pub fn fetch_seq<N: AsRef<str>>(&self, name: N, begin: usize, end: usize) -> String {
let cname = ffi::CString::new(name.as_ref().as_bytes()).unwrap();
let len_out: i32 = 0;
let cseq = unsafe {
let ptr = htslib::faidx_fetch_seq(
self.inner, //*const faidx_t,
cname.as_ptr(), // c_name
begin as ::std::os::raw::c_int, // p_beg_i
end as ::std::os::raw::c_int, // p_end_i
&mut (len_out as ::std::os::raw::c_int), //len
);
ffi::CStr::from_ptr(ptr)
};

cseq.to_str().unwrap().to_owned()
}
}


#[cfg(test)]
mod tests {
use super::*;

fn open_reader() -> Reader {
Reader::from_path(format!("{}/test/test_cram.fa", env!("CARGO_MANIFEST_DIR"))).ok().unwrap()
}

#[test]
fn faidx_open() {
}
dlaehnemann marked this conversation as resolved.
Show resolved Hide resolved

#[test]
fn faidx_read_chr_first_base() {
let r = open_reader();
let seq = r.fetch_seq("chr1", 0 , 0);
assert_eq!(seq.len(), 1);
assert_eq!(seq, "G");
}

#[test]
fn faidx_read_chr_start() {
let r = open_reader();
let seq = r.fetch_seq("chr1", 0 , 9);
assert_eq!(seq.len(), 10);
assert_eq!(seq, "GGGCACAGCC");
}

#[test]
fn faidx_read_chr_between() {
let r = open_reader();
let seq = r.fetch_seq("chr1", 4 , 14);
assert_eq!(seq.len(), 11);
assert_eq!(seq, "ACAGCCTCACC");
}

#[test]
fn faidx_read_chr_end() {
let r = open_reader();
let seq = r.fetch_seq("chr1", 110, 120);
assert_eq!(seq.len(), 10);
assert_eq!(seq, "CCCCTCCGTG");
}

#[test]
fn faidx_read_twice() {
let r = open_reader();
let seq = r.fetch_seq("chr1", 110, 120);
assert_eq!(seq.len(), 10);
assert_eq!(seq, "CCCCTCCGTG");

let seq = r.fetch_seq("chr1", 5, 9);
assert_eq!(seq.len(), 5);
assert_eq!(seq, "CAGCC");
}
}
3 changes: 2 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ extern crate snafu;

pub mod bam;
pub mod bcf;
pub mod faidx;
pub mod htslib;
pub mod tbx;
pub mod utils;
mod utils;