Skip to content

Commit

Permalink
Test case for --range
Browse files Browse the repository at this point in the history
Closes #26
  • Loading branch information
welchr committed May 9, 2020
1 parent 7d2994e commit 5af74fb
Show file tree
Hide file tree
Showing 7 changed files with 252 additions and 27 deletions.
3 changes: 3 additions & 0 deletions raremetal/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
add_executable(tests-raremetal
tests.cpp
reader_util.h
RMWScoreReader.cpp
RMWScoreReader.h
RMSingleVariantReader.cpp
RMSingleVariantReader.h
RMGroupTestReader.cpp
RMGroupTestReader.h
../src/GroupFromAnnotation.cpp
../src/GroupFromAnnotation.h
../src/Meta.cpp
Expand Down
97 changes: 97 additions & 0 deletions raremetal/tests/RMGroupTestReader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#include "RMGroupTestReader.h"

using namespace std;

uint64_t RMGroupTestReader::get_num_records() {
return records.size();
}

void RMGroupTestReader::load(const string &file) {
ifstream input_file(file);
string line;
auto line_separator = regex("[ \t]");

// Line regexes
auto regex_samples = regex("##TotalSampleSize=(\\d+)");
auto regex_studies = regex("##STUDY_NUM=(\\d+)");
auto regex_method = regex("##Method=(\\w+)");
auto regex_header = regex("#GROUPNAME\tNUM_VAR.*");

bool header_done = false;
vector<string> header;
while (getline(input_file, line)) {
smatch match;
if (!header_done) {
if (regex_search(line, match, regex_samples) && match.size() > 1) {
this->nsamples = stoul(match.str(1));
}
else if (regex_search(line, match, regex_studies) && match.size() > 1) {
this->nstudies = stoul(match.str(1));
}
else if (regex_search(line, match, regex_method) && match.size() > 1) {
this->group_test = match.str(1);
}
else if (regex_search(line, match, regex_header)) {
header_done = true;
copy(sregex_token_iterator(line.begin(), line.end(), line_separator, -1), sregex_token_iterator(), back_inserter(header));
}
}
else {
// Begin parsing record row
vector<string> tokens;
copy(sregex_token_iterator(line.begin(), line.end(), line_separator, -1), sregex_token_iterator(), back_inserter(tokens));

// For some reason RAREMETAL puts a genomic control line all the way at the end of the file...
if (line.substr(0,1) == "#") { continue; }

// Create record
auto rec = make_shared<RMGroupTestRecord>();
for (int i = 0; i < tokens.size(); i++) {
string col(header[i]);
if (col == "#GROUPNAME") {
rec->group = tokens.at(i);
}
else if (col == "NUM_VAR") {
rec->num_var = stoul(tokens.at(i));
}
else if (col == "VARs") {
auto semi = regex(";");
copy(sregex_token_iterator(tokens.at(i).begin(), tokens.at(i).end(), semi, -1), sregex_token_iterator(), back_inserter(rec->variants));
}
else if (col == "PVALUE_LIU") {
rec->pvalue_liu = extract_fp<double>(tokens.at(i));
}
else if (col == "PVALUE_DAVIES") {
rec->pvalue_davies = extract_fp<double>(tokens.at(i));
}
}

// Insert
records.push_back(rec);
this->index.emplace(rec->group, rec);
}
}
}

RMGroupTestReader::RMGroupTestReader(const string &file) {
load(file);
}

double RMGroupTestReader::get_nsamples() {
return nsamples;
}

uint64_t RMGroupTestReader::get_nstudies() {
return nstudies;
}

shared_ptr<RMGroupTestRecord> RMGroupTestReader::get_record(const string &i) {
auto it = this->index.find(i);
if (it != this->index.end()) {
return it->second;
}
else {
auto null = shared_ptr<RMGroupTestRecord>();
return null;
}
}
46 changes: 46 additions & 0 deletions raremetal/tests/RMGroupTestReader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#ifndef RAREMETAL_RMGROUPTESTREADER_H
#define RAREMETAL_RMGROUPTESTREADER_H

#include <string>
#include <map>
#include <vector>
#include <memory>
#include <fstream>
#include <string>
#include <iostream>
#include <regex>
#include <exception>
#include <limits>
#include "reader_util.h"

struct RMGroupTestRecord {
std::string group;
uint64_t num_var;
std::vector<std::string> variants;
double avg_af;
double min_af;
double max_af;
double stat;
double pvalue_davies;
double pvalue_liu;
};

class RMGroupTestReader {
protected:
uint64_t nsamples;
uint64_t nstudies;
std::string group_test;
std::string trait_name;
std::vector<std::shared_ptr<RMGroupTestRecord>> records;
std::map<std::string, std::shared_ptr<RMGroupTestRecord>> index;
public:
RMGroupTestReader(const std::string &file);
void load(const std::string &file);
double get_nsamples();
uint64_t get_nstudies();
std::string get_group_test();
uint64_t get_num_records();
std::shared_ptr<RMGroupTestRecord> get_record(const std::string &i);
};

#endif //RAREMETAL_RMGROUPTESTREADER_H
36 changes: 10 additions & 26 deletions raremetal/tests/RMSingleVariantReader.cpp
Original file line number Diff line number Diff line change
@@ -1,33 +1,17 @@
#include "RMSingleVariantReader.h"
#include <fstream>
#include <string>
#include <iostream>
#include <regex>
#include <exception>
#include <limits>

using namespace std;

template <typename T> T extract_fp(const std::string &s) {
T d;
try {
if (std::is_same<T, long double>::value) {
d = stold(s);
}
else if (std::is_same<T, double>::value) {
d = stod(s);
}
else if (std::is_same<T, float>::value) {
d = stof(s);
}
else {
throw std::invalid_argument("Invalid return type when extracting floating point type number from string");
}
}
catch (const std::exception &e) {
d = numeric_limits<T>::quiet_NaN();
}
return d;
uint64_t RMSingleVariantReader::get_num_records() {
return records.size();
}

RMSingleVariantReader::record_iterator RMSingleVariantReader::begin() const {
return records.begin();
}

RMSingleVariantReader::record_iterator RMSingleVariantReader::end() const {
return records.end();
}

void RMSingleVariantReader::load(const string &file) {
Expand Down
13 changes: 13 additions & 0 deletions raremetal/tests/RMSingleVariantReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
#include <map>
#include <vector>
#include <memory>
#include <fstream>
#include <string>
#include <iostream>
#include <regex>
#include <exception>
#include <limits>
#include "reader_util.h"

struct RMSingleVariantRecord {
std::string chrom;
Expand Down Expand Up @@ -32,11 +39,17 @@ class RMSingleVariantReader {
std::vector<std::shared_ptr<RMSingleVariantRecord>> records;
std::map<std::string, std::shared_ptr<RMSingleVariantRecord>> index;
public:
using record_iterator = std::vector<std::shared_ptr<RMSingleVariantRecord>>::const_iterator;

RMSingleVariantReader(const std::string &file);
void load(const std::string &file);
double get_nsamples();
uint64_t get_nstudies();
uint64_t get_num_records();
std::shared_ptr<RMSingleVariantRecord> get_record(const std::string &i);

record_iterator begin() const;
record_iterator end() const;
};

#endif //RAREMETAL_RMSINGLEVARIANTREADER_H
29 changes: 29 additions & 0 deletions raremetal/tests/reader_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#ifndef RAREMETAL_READER_UTIL_H
#define RAREMETAL_READER_UTIL_H

#include <string>
#include <limits>
#include <exception>

template <typename T> T extract_fp(const std::string &s) {
T d;
try {
if (std::is_same<T, long double>::value) {
d = stold(s);
}
else if (std::is_same<T, double>::value) {
d = stod(s);
}
else if (std::is_same<T, float>::value) {
d = stof(s);
}
else {
throw std::invalid_argument("Invalid return type when extracting floating point type number from string");
}
}
catch (const std::exception &e) {
d = std::numeric_limits<T>::quiet_NaN();
}
return d;
}
#endif //RAREMETAL_READER_UTIL_H
55 changes: 54 additions & 1 deletion raremetal/tests/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,67 @@
#include "../src/GroupFromAnnotation.h"
#include "../src/Meta.h"
#include "RMSingleVariantReader.h"

#include "RMGroupTestReader.h"
using namespace std;

bool file_exists(const std::string &name) {
ifstream f(name.c_str());
return f.good();
}

TEST_CASE("Program Arguments") {
SECTION("--range") {
Meta meta;
meta.prefix = "test.range";
meta.setLogFile();
meta.Region = "1:1-87";
meta.SKAT = true;
meta.scorefile.Add("tests/datasets/simulated/region/test.smallchunk.MetaScore.assoc.gz");
meta.covfile.Add("tests/datasets/simulated/region/test.smallchunk.MetaCov.assoc.gz");

GroupFromAnnotation group;
group.groupFile = "tests/datasets/simulated/region/test.smallchunk.mask.tab";

meta.Prepare();
group.Run("", meta.log);
meta.PoolSummaryStat(group);

// This is pretty wild. If you don't run the printing routine, the single variant p-values aren't stored
// internally, so group-based tests can't look them up. TODO: refactor this...
meta.WriteSingleVariantResults(group);
meta.Run(group);

// Given the range above, only ZSYH2 should be tested and not the other gene.
auto group_reader = RMGroupTestReader("test.range.meta.SKAT_.results");
auto group_rec = group_reader.get_record("ZSYH2");
auto num_group_rec = group_reader.get_num_records();

REQUIRE(num_group_rec == 1);
REQUIRE(group_rec->pvalue_liu == Approx(1.28628e-09));

// Given the range above, the single variant results should only contain records from position 1:2 to 1:87.
auto sv_reader = RMSingleVariantReader("test.range.meta.singlevar.results");
auto num_sv_rec = sv_reader.get_num_records();
auto sv_rec_first = *sv_reader.begin();
auto sv_rec_last = *(--sv_reader.end());

REQUIRE(num_sv_rec == 86);

REQUIRE(sv_rec_first->chrom == "1");
REQUIRE(sv_rec_first->pos == 2);
REQUIRE(sv_rec_first->pvalue == Approx(0.1487579));

REQUIRE(sv_rec_last->chrom == "1");
REQUIRE(sv_rec_last->pos == 87);
REQUIRE(sv_rec_last->pvalue == Approx(0.7183580));

remove("test.range.raremetal.log");
remove("test.range.meta.plots.pdf");
remove("test.range.meta.singlevar.results");
remove("test.range.meta.SKAT_.results");
}
}

TEST_CASE("P-value precision") {
SECTION("Score statistic resulting in very small p-value") {
Meta meta;
Expand Down

0 comments on commit 5af74fb

Please sign in to comment.