From 5af74fb8a546ae746bb8867da521e416f338f449 Mon Sep 17 00:00:00 2001 From: Ryan Welch Date: Fri, 8 May 2020 21:54:50 -0400 Subject: [PATCH] Test case for --range Closes #26 --- raremetal/tests/CMakeLists.txt | 3 + raremetal/tests/RMGroupTestReader.cpp | 97 +++++++++++++++++++++++ raremetal/tests/RMGroupTestReader.h | 46 +++++++++++ raremetal/tests/RMSingleVariantReader.cpp | 36 +++------ raremetal/tests/RMSingleVariantReader.h | 13 +++ raremetal/tests/reader_util.h | 29 +++++++ raremetal/tests/tests.cpp | 55 ++++++++++++- 7 files changed, 252 insertions(+), 27 deletions(-) create mode 100644 raremetal/tests/RMGroupTestReader.cpp create mode 100644 raremetal/tests/RMGroupTestReader.h create mode 100644 raremetal/tests/reader_util.h diff --git a/raremetal/tests/CMakeLists.txt b/raremetal/tests/CMakeLists.txt index 1d89fb5..1725d7f 100644 --- a/raremetal/tests/CMakeLists.txt +++ b/raremetal/tests/CMakeLists.txt @@ -1,9 +1,12 @@ add_executable(tests-raremetal tests.cpp + reader_util.h RMWScoreReader.cpp RMWScoreReader.h RMSingleVariantReader.cpp RMSingleVariantReader.h + RMGroupTestReader.cpp + RMGroupTestReader.h ../src/GroupFromAnnotation.cpp ../src/GroupFromAnnotation.h ../src/Meta.cpp diff --git a/raremetal/tests/RMGroupTestReader.cpp b/raremetal/tests/RMGroupTestReader.cpp new file mode 100644 index 0000000..9ff5b46 --- /dev/null +++ b/raremetal/tests/RMGroupTestReader.cpp @@ -0,0 +1,97 @@ +#include "RMGroupTestReader.h" + +using namespace std; + +uint64_t RMGroupTestReader::get_num_records() { + return records.size(); +} + +void RMGroupTestReader::load(const string &file) { + ifstream input_file(file); + string line; + auto line_separator = regex("[ \t]"); + + // Line regexes + auto regex_samples = regex("##TotalSampleSize=(\\d+)"); + auto regex_studies = regex("##STUDY_NUM=(\\d+)"); + auto regex_method = regex("##Method=(\\w+)"); + auto regex_header = regex("#GROUPNAME\tNUM_VAR.*"); + + bool header_done = false; + vector header; + while (getline(input_file, line)) { + smatch match; + if (!header_done) { + if (regex_search(line, match, regex_samples) && match.size() > 1) { + this->nsamples = stoul(match.str(1)); + } + else if (regex_search(line, match, regex_studies) && match.size() > 1) { + this->nstudies = stoul(match.str(1)); + } + else if (regex_search(line, match, regex_method) && match.size() > 1) { + this->group_test = match.str(1); + } + else if (regex_search(line, match, regex_header)) { + header_done = true; + copy(sregex_token_iterator(line.begin(), line.end(), line_separator, -1), sregex_token_iterator(), back_inserter(header)); + } + } + else { + // Begin parsing record row + vector tokens; + copy(sregex_token_iterator(line.begin(), line.end(), line_separator, -1), sregex_token_iterator(), back_inserter(tokens)); + + // For some reason RAREMETAL puts a genomic control line all the way at the end of the file... + if (line.substr(0,1) == "#") { continue; } + + // Create record + auto rec = make_shared(); + for (int i = 0; i < tokens.size(); i++) { + string col(header[i]); + if (col == "#GROUPNAME") { + rec->group = tokens.at(i); + } + else if (col == "NUM_VAR") { + rec->num_var = stoul(tokens.at(i)); + } + else if (col == "VARs") { + auto semi = regex(";"); + copy(sregex_token_iterator(tokens.at(i).begin(), tokens.at(i).end(), semi, -1), sregex_token_iterator(), back_inserter(rec->variants)); + } + else if (col == "PVALUE_LIU") { + rec->pvalue_liu = extract_fp(tokens.at(i)); + } + else if (col == "PVALUE_DAVIES") { + rec->pvalue_davies = extract_fp(tokens.at(i)); + } + } + + // Insert + records.push_back(rec); + this->index.emplace(rec->group, rec); + } + } +} + +RMGroupTestReader::RMGroupTestReader(const string &file) { + load(file); +} + +double RMGroupTestReader::get_nsamples() { + return nsamples; +} + +uint64_t RMGroupTestReader::get_nstudies() { + return nstudies; +} + +shared_ptr RMGroupTestReader::get_record(const string &i) { + auto it = this->index.find(i); + if (it != this->index.end()) { + return it->second; + } + else { + auto null = shared_ptr(); + return null; + } +} \ No newline at end of file diff --git a/raremetal/tests/RMGroupTestReader.h b/raremetal/tests/RMGroupTestReader.h new file mode 100644 index 0000000..805d57a --- /dev/null +++ b/raremetal/tests/RMGroupTestReader.h @@ -0,0 +1,46 @@ +#ifndef RAREMETAL_RMGROUPTESTREADER_H +#define RAREMETAL_RMGROUPTESTREADER_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "reader_util.h" + +struct RMGroupTestRecord { + std::string group; + uint64_t num_var; + std::vector variants; + double avg_af; + double min_af; + double max_af; + double stat; + double pvalue_davies; + double pvalue_liu; +}; + +class RMGroupTestReader { +protected: + uint64_t nsamples; + uint64_t nstudies; + std::string group_test; + std::string trait_name; + std::vector> records; + std::map> index; +public: + RMGroupTestReader(const std::string &file); + void load(const std::string &file); + double get_nsamples(); + uint64_t get_nstudies(); + std::string get_group_test(); + uint64_t get_num_records(); + std::shared_ptr get_record(const std::string &i); +}; + +#endif //RAREMETAL_RMGROUPTESTREADER_H diff --git a/raremetal/tests/RMSingleVariantReader.cpp b/raremetal/tests/RMSingleVariantReader.cpp index 0cb86c6..3c6ec5d 100644 --- a/raremetal/tests/RMSingleVariantReader.cpp +++ b/raremetal/tests/RMSingleVariantReader.cpp @@ -1,33 +1,17 @@ #include "RMSingleVariantReader.h" -#include -#include -#include -#include -#include -#include using namespace std; -template T extract_fp(const std::string &s) { - T d; - try { - if (std::is_same::value) { - d = stold(s); - } - else if (std::is_same::value) { - d = stod(s); - } - else if (std::is_same::value) { - d = stof(s); - } - else { - throw std::invalid_argument("Invalid return type when extracting floating point type number from string"); - } - } - catch (const std::exception &e) { - d = numeric_limits::quiet_NaN(); - } - return d; +uint64_t RMSingleVariantReader::get_num_records() { + return records.size(); +} + +RMSingleVariantReader::record_iterator RMSingleVariantReader::begin() const { + return records.begin(); +} + +RMSingleVariantReader::record_iterator RMSingleVariantReader::end() const { + return records.end(); } void RMSingleVariantReader::load(const string &file) { diff --git a/raremetal/tests/RMSingleVariantReader.h b/raremetal/tests/RMSingleVariantReader.h index 5ebcf28..bbf669e 100644 --- a/raremetal/tests/RMSingleVariantReader.h +++ b/raremetal/tests/RMSingleVariantReader.h @@ -5,6 +5,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include "reader_util.h" struct RMSingleVariantRecord { std::string chrom; @@ -32,11 +39,17 @@ class RMSingleVariantReader { std::vector> records; std::map> index; public: + using record_iterator = std::vector>::const_iterator; + RMSingleVariantReader(const std::string &file); void load(const std::string &file); double get_nsamples(); uint64_t get_nstudies(); + uint64_t get_num_records(); std::shared_ptr get_record(const std::string &i); + + record_iterator begin() const; + record_iterator end() const; }; #endif //RAREMETAL_RMSINGLEVARIANTREADER_H diff --git a/raremetal/tests/reader_util.h b/raremetal/tests/reader_util.h new file mode 100644 index 0000000..b75949b --- /dev/null +++ b/raremetal/tests/reader_util.h @@ -0,0 +1,29 @@ +#ifndef RAREMETAL_READER_UTIL_H +#define RAREMETAL_READER_UTIL_H + +#include +#include +#include + +template T extract_fp(const std::string &s) { + T d; + try { + if (std::is_same::value) { + d = stold(s); + } + else if (std::is_same::value) { + d = stod(s); + } + else if (std::is_same::value) { + d = stof(s); + } + else { + throw std::invalid_argument("Invalid return type when extracting floating point type number from string"); + } + } + catch (const std::exception &e) { + d = std::numeric_limits::quiet_NaN(); + } + return d; +} +#endif //RAREMETAL_READER_UTIL_H diff --git a/raremetal/tests/tests.cpp b/raremetal/tests/tests.cpp index 657f1a2..1bfbb48 100644 --- a/raremetal/tests/tests.cpp +++ b/raremetal/tests/tests.cpp @@ -6,7 +6,7 @@ #include "../src/GroupFromAnnotation.h" #include "../src/Meta.h" #include "RMSingleVariantReader.h" - +#include "RMGroupTestReader.h" using namespace std; bool file_exists(const std::string &name) { @@ -14,6 +14,59 @@ bool file_exists(const std::string &name) { return f.good(); } +TEST_CASE("Program Arguments") { + SECTION("--range") { + Meta meta; + meta.prefix = "test.range"; + meta.setLogFile(); + meta.Region = "1:1-87"; + meta.SKAT = true; + meta.scorefile.Add("tests/datasets/simulated/region/test.smallchunk.MetaScore.assoc.gz"); + meta.covfile.Add("tests/datasets/simulated/region/test.smallchunk.MetaCov.assoc.gz"); + + GroupFromAnnotation group; + group.groupFile = "tests/datasets/simulated/region/test.smallchunk.mask.tab"; + + meta.Prepare(); + group.Run("", meta.log); + meta.PoolSummaryStat(group); + + // This is pretty wild. If you don't run the printing routine, the single variant p-values aren't stored + // internally, so group-based tests can't look them up. TODO: refactor this... + meta.WriteSingleVariantResults(group); + meta.Run(group); + + // Given the range above, only ZSYH2 should be tested and not the other gene. + auto group_reader = RMGroupTestReader("test.range.meta.SKAT_.results"); + auto group_rec = group_reader.get_record("ZSYH2"); + auto num_group_rec = group_reader.get_num_records(); + + REQUIRE(num_group_rec == 1); + REQUIRE(group_rec->pvalue_liu == Approx(1.28628e-09)); + + // Given the range above, the single variant results should only contain records from position 1:2 to 1:87. + auto sv_reader = RMSingleVariantReader("test.range.meta.singlevar.results"); + auto num_sv_rec = sv_reader.get_num_records(); + auto sv_rec_first = *sv_reader.begin(); + auto sv_rec_last = *(--sv_reader.end()); + + REQUIRE(num_sv_rec == 86); + + REQUIRE(sv_rec_first->chrom == "1"); + REQUIRE(sv_rec_first->pos == 2); + REQUIRE(sv_rec_first->pvalue == Approx(0.1487579)); + + REQUIRE(sv_rec_last->chrom == "1"); + REQUIRE(sv_rec_last->pos == 87); + REQUIRE(sv_rec_last->pvalue == Approx(0.7183580)); + + remove("test.range.raremetal.log"); + remove("test.range.meta.plots.pdf"); + remove("test.range.meta.singlevar.results"); + remove("test.range.meta.SKAT_.results"); + } +} + TEST_CASE("P-value precision") { SECTION("Score statistic resulting in very small p-value") { Meta meta;