diff --git a/docs/conf.py b/docs/conf.py index 67eed9b3c..085169234 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -69,7 +69,7 @@ # The short X.Y version. version = '1.5' # The full version, including alpha/beta/rc tags. -release = '1.5.2' +release = '1.5.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/index.rst b/docs/index.rst index d1bbea50c..837f08874 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -52,9 +52,21 @@ New in release |release| This is a hotfix release. It fixes the following issues: -- AGFusion exon files may be comma-delimited. Previously, the file parser - assumed the files were tab-delimited. This release now allows AGFusion - inputs that are comma- or tab-delimited. +- pVACbind would previously throw an error if a peptide sequence in the input + fasta was shorter than one of the chosen epitope lengths. This issue has + been fixed by first parsing the input fasta and creating individual fasta + files for each epitope length that enforce a minimum length of the peptide + sequences matching the respective epitope length. +- Previous versions of pVACtools resolved an issue where IEDB would output a + warning line if one of the epitope sequences only contained A, C, G, or T + amino acids, since those sequences could also be nuclotide sequences. + However, this issue was only fixed in pVACseq, not pVACbind, or pVACvector. + This release fixes this issue for all tools. +- The wrappers for NetChop or NetMHCstabpan split the set of input epitopes + into chunks of 100 before processing. Due to a bug in the file splitting + logic, one epitope for each chunk over 100 would be errenously dropped. This + effectively would result in less epitopes being returned in the filtered + report than if running the pipelines without NetChop or NetMHCstabpan. New in version |version| ------------------------ diff --git a/lib/net_chop.py b/lib/net_chop.py index b59b2b63f..ac8a532d4 100644 --- a/lib/net_chop.py +++ b/lib/net_chop.py @@ -7,22 +7,11 @@ import os from time import sleep import collections +import lib.utils cycle = ['|', '/', '-', '\\'] methods = ['cterm', '20s'] -def split_file(reader, lines=400): - from itertools import islice, chain - for tmp in reader: - if tmp != "": - yield chain([tmp], islice(reader, lines-1)) - try: - tmp = next(reader) - except StopIteration: - return - else: - break - def main(args_input = sys.argv[1:]): parser = argparse.ArgumentParser("pvacseq net_chop", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( @@ -64,7 +53,7 @@ def main(args_input = sys.argv[1:]): i=1 print("Waiting for results from NetChop... |", end='') sys.stdout.flush() - for chunk in split_file(reader, 100): + for chunk in lib.utils.split_file(reader, 100): staging_file = tempfile.NamedTemporaryFile(mode='w+') current_buffer = {} for line in chunk: diff --git a/lib/netmhc_stab.py b/lib/netmhc_stab.py index b08f6ea8f..95b7c5a1d 100644 --- a/lib/netmhc_stab.py +++ b/lib/netmhc_stab.py @@ -6,22 +6,11 @@ import re import os from time import sleep +import lib.utils cycle = ['|', '/', '-', '\\'] methods = ['cterm', '20s'] -def split_file(reader, lines=400): - from itertools import islice, chain - for tmp in reader: - if tmp != "": - yield chain([tmp], islice(reader, lines-1)) - try: - tmp = next(reader) - except StopIteration: - return - else: - break - def main(args_input = sys.argv[1:]): parser = argparse.ArgumentParser("pvacseq net_chop", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( @@ -51,7 +40,7 @@ def main(args_input = sys.argv[1:]): i=1 print("Waiting for results from NetMHCStabPan... |", end='') sys.stdout.flush() - for chunk in split_file(reader, 100): + for chunk in lib.utils.split_file(reader, 100): peptide_lengths = set() staging_file = tempfile.NamedTemporaryFile(mode='w+') current_buffer = {} diff --git a/lib/pipeline.py b/lib/pipeline.py index 11748a68a..2a3ba7485 100644 --- a/lib/pipeline.py +++ b/lib/pipeline.py @@ -480,8 +480,11 @@ def fasta_entry_count(self): row_count += 1 return row_count - def split_fasta_basename(self): - return os.path.join(self.tmp_dir, self.sample_name + ".fa.split") + def fasta_basename(self, length): + return os.path.join(self.tmp_dir, "{}.{}.fa".format(self.sample_name, length)) + + def split_fasta_basename(self, length): + return "{}.split".format(self.fasta_basename(length)) def uniquify_records(self, records): fasta_sequences = OrderedDict() @@ -497,7 +500,14 @@ def uniquify_records(self, records): count += 1 return (uniq_records, keys) - def split_fasta_file(self): + def create_per_length_fasta(self, length): + records = [] + for record in SeqIO.parse(self.input_file, "fasta"): + if len(str(record.seq)) >= length: + records.append(record) + SeqIO.write(records, self.fasta_basename(length), "fasta") + + def split_fasta_file(self, length): fasta_entry_count = self.fasta_entry_count() status_message("Splitting FASTA into smaller chunks") chunks = [] @@ -508,7 +518,7 @@ def split_fasta_file(self): if split_end > fasta_entry_count: split_end = fasta_entry_count status_message("Splitting FASTA into smaller chunks - Entries %d-%d" % (split_start, split_end)) - split_fasta_file_path = "%s_%d-%d" % (self.split_fasta_basename(), split_start, split_end) + split_fasta_file_path = "%s_%d-%d" % (self.split_fasta_basename(length), split_start, split_end) split_fasta_key_file_path = "{}.key".format(split_fasta_file_path) chunks.append([split_start, split_end]) if os.path.exists(split_fasta_file_path): @@ -517,7 +527,7 @@ def split_fasta_file(self): else: split_fasta_records = [] skip = 0 - for record in SeqIO.parse(self.input_file, "fasta"): + for record in SeqIO.parse(self.fasta_basename(length), "fasta"): if skip == 0: split_fasta_records.append(record) if row_count == fasta_entry_count: @@ -535,7 +545,7 @@ def split_fasta_file(self): if split_end > fasta_entry_count: split_end = fasta_entry_count status_message("Splitting FASTA into smaller chunks - Entries %d-%d" % (split_start, split_end)) - split_fasta_file_path = "%s_%d-%d" % (self.split_fasta_basename(), split_start, split_end) + split_fasta_file_path = "%s_%d-%d" % (self.split_fasta_basename(length), split_start, split_end) split_fasta_key_file_path = "{}.key".format(split_fasta_file_path) chunks.append([split_start, split_end]) if os.path.exists(split_fasta_file_path): @@ -554,12 +564,145 @@ def split_fasta_file(self): status_message("Completed") return chunks + def call_iedb(self, chunks, length): + alleles = self.alleles + prediction_algorithms = self.prediction_algorithms + argument_sets = [] + warning_messages = [] + for (split_start, split_end) in chunks: + tsv_chunk = "%d-%d" % (split_start, split_end) + if self.input_file_type == 'fasta': + fasta_chunk = tsv_chunk + else: + fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2) + for a in alleles: + split_fasta_file_path = "%s_%s"%(self.split_fasta_basename(length), fasta_chunk) + if os.path.getsize(split_fasta_file_path) == 0: + msg = "Fasta file {} is empty. Skipping".format(split_fasta_file_path) + if msg not in warning_messages: + warning_messages.append(msg) + continue + #begin of per-algorithm processing + for method in prediction_algorithms: + prediction_class = globals()[method] + prediction = prediction_class() + if hasattr(prediction, 'iedb_prediction_method'): + iedb_method = prediction.iedb_prediction_method + else: + iedb_method = method + valid_alleles = prediction.valid_allele_names() + if a not in valid_alleles: + msg = "Allele %s not valid for Method %s. Skipping." % (a, method) + if msg not in warning_messages: + warning_messages.append(msg) + continue + valid_lengths = prediction.valid_lengths_for_allele(a) + if length not in valid_lengths: + msg = "Epitope Length %s is not valid for Method %s and Allele %s. Skipping." % (length, method, a) + if msg not in warning_messages: + warning_messages.append(msg) + continue + + split_iedb_out = os.path.join(self.tmp_dir, ".".join([self.sample_name, iedb_method, a, str(length), "tsv_%s" % fasta_chunk])) + if os.path.exists(split_iedb_out): + msg = "Prediction file for Allele %s and Epitope Length %s with Method %s (Entries %s) already exists. Skipping." % (a, length, method, fasta_chunk) + if msg not in warning_messages: + warning_messages.append(msg) + continue + arguments = [ + split_fasta_file_path, + split_iedb_out, + method, + a, + '-r', str(self.iedb_retries), + '-e', self.iedb_executable, + ] + if not isinstance(prediction, IEDBMHCII): + arguments.extend(['-l', str(length),]) + argument_sets.append(arguments) + + for msg in warning_messages: + status_message(msg) + + with pymp.Parallel(self.n_threads) as p: + for index in p.range(len(argument_sets)): + arguments = argument_sets[index] + a = arguments[3] + method = arguments[2] + filename = arguments[1] + if len(arguments) == 10: + epl = arguments[9] + else: + epl = 15 + p.print("Making binding predictions on Allele %s and Epitope Length %s with Method %s - File %s" % (a, epl, method, filename)) + lib.call_iedb.main(arguments) + p.print("Making binding predictions on Allele %s and Epitope Length %s with Method %s - File %s - Completed" % (a, epl, method, filename)) + + def parse_outputs(self, chunks, length): + split_parsed_output_files = [] + for (split_start, split_end) in chunks: + tsv_chunk = "%d-%d" % (split_start, split_end) + if self.input_file_type == 'fasta': + fasta_chunk = tsv_chunk + else: + fasta_chunk = "%d-%d" % (split_start*2-1, split_end*2) + for a in self.alleles: + split_iedb_output_files = [] + status_message("Parsing binding predictions for Allele %s and Epitope Length %s - Entries %s" % (a, length, fasta_chunk)) + for method in self.prediction_algorithms: + prediction_class = globals()[method] + prediction = prediction_class() + if hasattr(prediction, 'iedb_prediction_method'): + iedb_method = prediction.iedb_prediction_method + else: + iedb_method = method + valid_alleles = prediction.valid_allele_names() + if a not in valid_alleles: + continue + valid_lengths = prediction.valid_lengths_for_allele(a) + if length not in valid_lengths: + continue + split_iedb_out = os.path.join(self.tmp_dir, ".".join([self.sample_name, iedb_method, a, str(length), "tsv_%s" % fasta_chunk])) + if os.path.exists(split_iedb_out): + split_iedb_output_files.append(split_iedb_out) + + split_parsed_file_path = os.path.join(self.tmp_dir, ".".join([self.sample_name, a, str(length), "parsed", "tsv_%s" % fasta_chunk])) + if os.path.exists(split_parsed_file_path): + status_message("Parsed Output File for Allele %s and Epitope Length %s (Entries %s) already exists. Skipping" % (a, length, fasta_chunk)) + split_parsed_output_files.append(split_parsed_file_path) + continue + split_fasta_file_path = "%s_%s"%(self.split_fasta_basename(length), fasta_chunk) + split_fasta_key_file_path = split_fasta_file_path + '.key' + + if len(split_iedb_output_files) > 0: + status_message("Parsing prediction file for Allele %s and Epitope Length %s - Entries %s" % (a, length, fasta_chunk)) + split_tsv_file_path = "%s_%s" % (self.tsv_file_path(), tsv_chunk) + params = { + 'input_iedb_files' : split_iedb_output_files, + 'input_tsv_file' : split_tsv_file_path, + 'key_file' : split_fasta_key_file_path, + 'output_file' : split_parsed_file_path, + } + if self.additional_report_columns and 'sample_name' in self.additional_report_columns: + params['sample_name'] = self.sample_name + else: + params['sample_name'] = None + parser = self.output_parser(params) + parser.execute() + status_message("Parsing prediction file for Allele %s and Epitope Length %s - Entries %s - Completed" % (a, length, fasta_chunk)) + + split_parsed_output_files.append(split_parsed_file_path) + return split_parsed_output_files + def execute(self): self.print_log() - chunks = self.split_fasta_file() - self.call_iedb(chunks) - split_parsed_output_files = self.parse_outputs(chunks) + split_parsed_output_files = [] + for length in self.epitope_lengths: + self.create_per_length_fasta(length) + chunks = self.split_fasta_file(length) + self.call_iedb(chunks, length) + split_parsed_output_files.extend(self.parse_outputs(chunks, length)) if len(split_parsed_output_files) == 0: status_message("No output files were created. Aborting.") diff --git a/lib/utils.py b/lib/utils.py index b49cafe72..a6e00cea3 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -1,5 +1,13 @@ import binascii +from itertools import islice def is_gz_file(filepath): with open(filepath, 'rb') as test_f: return binascii.hexlify(test_f.read(2)) == b'1f8b' + +def split_file(reader, lines): + i = iter(reader) + piece = list(islice(i, lines)) + while piece: + yield piece + piece = list(islice(i, lines)) diff --git a/setup.py b/setup.py index 1f902bec4..83d6757c2 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ setup( name="pvactools", - version="1.5.2", + version="1.5.3", packages=[ "tools", "tools.pvacbind", diff --git a/tests/test_data/pvacbind/MHC_Class_I/tmp/Test.fa.split_1-48 b/tests/test_data/pvacbind/MHC_Class_I/tmp/Test.10.fa.split_1-48 similarity index 100% rename from tests/test_data/pvacbind/MHC_Class_I/tmp/Test.fa.split_1-48 rename to tests/test_data/pvacbind/MHC_Class_I/tmp/Test.10.fa.split_1-48 diff --git a/tests/test_data/pvacbind/MHC_Class_I/tmp/Test.fa.split_1-48.key b/tests/test_data/pvacbind/MHC_Class_I/tmp/Test.10.fa.split_1-48.key similarity index 100% rename from tests/test_data/pvacbind/MHC_Class_I/tmp/Test.fa.split_1-48.key rename to tests/test_data/pvacbind/MHC_Class_I/tmp/Test.10.fa.split_1-48.key diff --git a/tests/test_data/pvacbind/MHC_Class_II/tmp/Test.fa.split_1-48 b/tests/test_data/pvacbind/MHC_Class_I/tmp/Test.9.fa.split_1-48 similarity index 100% rename from tests/test_data/pvacbind/MHC_Class_II/tmp/Test.fa.split_1-48 rename to tests/test_data/pvacbind/MHC_Class_I/tmp/Test.9.fa.split_1-48 diff --git a/tests/test_data/pvacbind/MHC_Class_II/tmp/Test.fa.split_1-48.key b/tests/test_data/pvacbind/MHC_Class_I/tmp/Test.9.fa.split_1-48.key similarity index 100% rename from tests/test_data/pvacbind/MHC_Class_II/tmp/Test.fa.split_1-48.key rename to tests/test_data/pvacbind/MHC_Class_I/tmp/Test.9.fa.split_1-48.key diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.fa.split_1-48 b/tests/test_data/pvacbind/MHC_Class_II/tmp/Test.15.fa.split_1-48 similarity index 100% rename from tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.fa.split_1-48 rename to tests/test_data/pvacbind/MHC_Class_II/tmp/Test.15.fa.split_1-48 diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.fa.split_1-48.key b/tests/test_data/pvacbind/MHC_Class_II/tmp/Test.15.fa.split_1-48.key similarity index 100% rename from tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.fa.split_1-48.key rename to tests/test_data/pvacbind/MHC_Class_II/tmp/Test.15.fa.split_1-48.key diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.10.fa b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.10.fa new file mode 100644 index 000000000..f559bef16 --- /dev/null +++ b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.10.fa @@ -0,0 +1,132 @@ +>1 +LPLPPPPLLPLLLLLLGASGG +>1_duplicate +LPLPPPPLLPLLLLLLGASGG +>2 +LPLPPPPLLPLLPLLLLLGASGG +>3 +DPASAAAAAAAAAAAAAAVIPTVSTPPP +>4 +DPASAAAAAAAVIPTVSTPPP +>5 +VNSATLSRTLLAAAGGSSLQ +>6 +VNSATLSRTLLLAAAGGSSLQ +>7 +GRSGGSHVWTRSRDPEGSSRK +>8 +GRSGGSHVWTHSRDPEGSSRK +>9 +RQKAVRPLELAYCLQKCNVPL +>10 +RQKAVRPLELVYCLQKCNVPL +>11 +NNKSVNEALNHLLTEEEDYQG +>12 +NNKSVNEALNNLLTEEEDYQG +>13 +KEDAVQGIANEDAAQGIAKED +>14 +KEDAVQGIANQDAAQGIAKED +>15 +PGKTVTISCTRSSGSIASNYV +>16 +PGKTVTISCTGSSGSIASNYV +>17 +VQWYQQRPGSSPTTVIYEDNQ +>18 +VQWYQQRPGSAPTTVIYEDNQ +>19 +VLHHEDLIGKPGGVSLSKIER +>20 +VLHHEDLIGKHGGVSLSKIER +>21 +VKSPVKEEEKPQEVKVKEPPK +>22 +VKSPVKEEEKTQEVKVKEPPK +>23 +SLMVCELAGNPFNCECDLFGF +>24 +SLMVCELAGNLFNCECDLFGF +>25 +SHLSYLSVRGGFNMSSFKLKE +>26 +SHLSYLSVRGGFNMSSFKLKQ +>27 +SLLHTVSPEPPRPPQQPVPTE +>28 +SLLHTVSPEPARPPQQPVPTE +>29 +EDTGGGGRSAGQHWARLRGE +>30 +EDTGGGGRSAQHWARLRGESGLSLERHRSTLTQASSMTPHSGPRSTTSQASPAQRDTAQA +ASTREIPRASSPHRITQRDTSRASSTQQEISRASSTQQETSRASSTQEDTPRASSTQEDT +PRASSTQWNTPRASSPSRSTQLDNPRTSSTQQDNPQTSFPTCTPQRENPRTPCVQQDDPR +ASSPNRTTQRENSRTSCAQRDNPKASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRD +NPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDN +PRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRAARDNPTTSCAQRDNPRASRTSSPNR +ATRDNPRTSCAQRDNPRASSPNRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCA +QRDNPRASSPNRTTQQDSPRTSCARRDDPRASSPNRTIQQENPRTSCALRDNPRASSPSR +TIQQENPRTSCAQRDDPRASSPNRTTQQENPRTSCARRDNPRASSRNRTIQRDNPRTSCA +QRDNPRASSPNRTIQQENLRTSCTRQDNPRTSSPNRATRDNPRTSCAQRDNLRASSPIRA +TQQDNPRTCIQQNIPRSSSTQQDNPKTSCTKRDNLRPTCTQRDRTQSFSFQRDNPGTSSS +QCCTQKENLRPSSPHRSTQWNNPRNSSPHRTNKDIPWASFPLRPTQSDGPRTSSPSRSKQ +SEVPWASIALRPTQGDRPQTSSPSRPAQHDPPQSSFGPTQYNLPSRATSSSHNPGHQSTS +RTSSPVYPAAYGAPLTSPEPSQPPCAVCIGHRDAPRASSPPRYLQHDPFPFFPEPRAPES +EPPHHEPPYIPPAVCIGHRDAPRASSPPRHTQFDPFPFLPDTSDAEHQCQSPQHEPLQLP +APVCIGYRDAPRASSPPRQAPEPSLLFQDLPRASTESLVPSMDSLHECPHIPTPVCIGHR +DAPSFSSPPRQAPEPSLFFQDPPGTSMESLAPSTDSLHGSPVLIPQVCIGHRDAPRASSP +PRHPPSDLAFLAPSPSPGSSGGSRGSAPPGETRHNLEREEYTVLADLPPPRRLAQRQPGP +QAQCSSGGRTHSPGRAEVERLFGQERRKSEAAGAFQAQDEGRSQQPSQGQSQLLRRQSSP +APSRQVTMLPAKQAELTRRSQAEPPHPWSPEKRPEGDRQLQGSPLPPRTSARTPERELRT +QRPLESGQAGPRQPLGVWQSQEEPPGSQGPHRHLERSWSSQEGGLGPGGWWGCGEPSLGA +AKAPEGAWGGTSREYKESWGQPEAWEEKPTHELPRELGKRSPLTSPPENWGGPAESSQSW +HSGTPTAVGWGAEGACPYPRGSERRPELDWRDLLGLLRAPGEGVWARVPSLDWEGLLELL +QARLPRKDPAGHRDDLARALGPELGPPGTNDVPEQESHSQPEGWAEATPVNGHSPALQSQ +SPVQLPSPACTSTQWPKIKVTRGPATATLAGLEQTGPLGSRSTAKGPSLPELQFQPEEPE +ESEPSRGQDPLTDQKQADSADKRPAEGKAGSPLKGRLVTSWRMPGDRPTLFNPFLLSLGV +LRWRRPDLLNFKKGWMSILDEPGEPPSPSLTTTSTSQWKKHWFVLTDSSLKYYRDSTAEE +ADELDGEIDLRSCTDVTEYAVQRNYGFQIHTKDAVYTLSAMTSGIRRNWIEALRKTVRPT +SAPDVTKLSDSNKENALHSYSTQKGPLKAGEQRAGSEVISRGGPRKADGQRQALDYVELS +PLTQASPQRARTPARTPDRLAKQEELERDLAQRSEERRKWFEATDSRTPEVPAGEGPRRG +LGAPLTEDQQNRLSEEIEKKWQELEKLPLRENKRVPLTALLNQSRGERRGPPSDGHEALE +KEVQALRAQLEAWRLQGEAPQSALRSQEDGHIPPGYISQEACERSLAEMESSHQQVMEEL +QRHHERELQRLQQEKEWLLAEETAATASAIEAMKKAYQEELSRELSKTRSLQQGPDGLRK +QHQSDVEALKRELQVLSEQYSQKCLEIGALMRQAEEREHTLRRCQQEGQELLRHNQELHG +RLSEEIDQLRGFIASQGMGNGCGRSNERSSCELEVLLRVKENELQYLKKEVQCLRDELQM +MQKDKRFTSGKYQDVYVELSHIKTRSEREIEQLKEHLRLAMAALQEKESMRNSLAEYSTG +QGSGEKAGCPWSGTGQH +>31 +GMYQPCDDMDCLSDRCKILQV +>32 +GMYQPCDDMDYLSDRCKILQV +>33 +ASVLCQRAKVAMSHFEPNEYI +>34 +ASVLCQRAKVEMSHFEPNEYI +>35 +AIAGTLKFNPETDYLTGTDGK +>36 +AIAGTLKFNPQTDYLTGTDGK +>37 +WLYYSYGLLHTYGSGGYALYF +>38 +WLYYSYGLLHIYGSGGYALYF +>39 +TSAKTTVVVTAQKRNSRRQLP +>40 +TSAKTTVVVTTQKRNSRRQLP +>41 +VPALGWEFLASTRLTSELNFL +>42 +VPALGWEFLAFTRLTSELNFL +>43 +TRVSLFGALVHSRTYDMDVRL +>44 +TRVSLFGALVRSRTYDMDVRL +>44_duplicate +TRVSLFGALVRSRTYDMDVRL +>45 +AREERTEASGSESRVEPPHEN +>46 +AREERTEASGYESRVEPPHEN diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.fa.split_1-48 b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.10.fa.split_1-48 similarity index 100% rename from tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.fa.split_1-48 rename to tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.10.fa.split_1-48 diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.fa.split_1-48.key b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.10.fa.split_1-48.key similarity index 100% rename from tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.fa.split_1-48.key rename to tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.10.fa.split_1-48.key diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.9.fa b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.9.fa new file mode 100644 index 000000000..f559bef16 --- /dev/null +++ b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.9.fa @@ -0,0 +1,132 @@ +>1 +LPLPPPPLLPLLLLLLGASGG +>1_duplicate +LPLPPPPLLPLLLLLLGASGG +>2 +LPLPPPPLLPLLPLLLLLGASGG +>3 +DPASAAAAAAAAAAAAAAVIPTVSTPPP +>4 +DPASAAAAAAAVIPTVSTPPP +>5 +VNSATLSRTLLAAAGGSSLQ +>6 +VNSATLSRTLLLAAAGGSSLQ +>7 +GRSGGSHVWTRSRDPEGSSRK +>8 +GRSGGSHVWTHSRDPEGSSRK +>9 +RQKAVRPLELAYCLQKCNVPL +>10 +RQKAVRPLELVYCLQKCNVPL +>11 +NNKSVNEALNHLLTEEEDYQG +>12 +NNKSVNEALNNLLTEEEDYQG +>13 +KEDAVQGIANEDAAQGIAKED +>14 +KEDAVQGIANQDAAQGIAKED +>15 +PGKTVTISCTRSSGSIASNYV +>16 +PGKTVTISCTGSSGSIASNYV +>17 +VQWYQQRPGSSPTTVIYEDNQ +>18 +VQWYQQRPGSAPTTVIYEDNQ +>19 +VLHHEDLIGKPGGVSLSKIER +>20 +VLHHEDLIGKHGGVSLSKIER +>21 +VKSPVKEEEKPQEVKVKEPPK +>22 +VKSPVKEEEKTQEVKVKEPPK +>23 +SLMVCELAGNPFNCECDLFGF +>24 +SLMVCELAGNLFNCECDLFGF +>25 +SHLSYLSVRGGFNMSSFKLKE +>26 +SHLSYLSVRGGFNMSSFKLKQ +>27 +SLLHTVSPEPPRPPQQPVPTE +>28 +SLLHTVSPEPARPPQQPVPTE +>29 +EDTGGGGRSAGQHWARLRGE +>30 +EDTGGGGRSAQHWARLRGESGLSLERHRSTLTQASSMTPHSGPRSTTSQASPAQRDTAQA +ASTREIPRASSPHRITQRDTSRASSTQQEISRASSTQQETSRASSTQEDTPRASSTQEDT +PRASSTQWNTPRASSPSRSTQLDNPRTSSTQQDNPQTSFPTCTPQRENPRTPCVQQDDPR +ASSPNRTTQRENSRTSCAQRDNPKASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRD +NPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDN +PRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRAARDNPTTSCAQRDNPRASRTSSPNR +ATRDNPRTSCAQRDNPRASSPNRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCA +QRDNPRASSPNRTTQQDSPRTSCARRDDPRASSPNRTIQQENPRTSCALRDNPRASSPSR +TIQQENPRTSCAQRDDPRASSPNRTTQQENPRTSCARRDNPRASSRNRTIQRDNPRTSCA +QRDNPRASSPNRTIQQENLRTSCTRQDNPRTSSPNRATRDNPRTSCAQRDNLRASSPIRA +TQQDNPRTCIQQNIPRSSSTQQDNPKTSCTKRDNLRPTCTQRDRTQSFSFQRDNPGTSSS +QCCTQKENLRPSSPHRSTQWNNPRNSSPHRTNKDIPWASFPLRPTQSDGPRTSSPSRSKQ +SEVPWASIALRPTQGDRPQTSSPSRPAQHDPPQSSFGPTQYNLPSRATSSSHNPGHQSTS +RTSSPVYPAAYGAPLTSPEPSQPPCAVCIGHRDAPRASSPPRYLQHDPFPFFPEPRAPES +EPPHHEPPYIPPAVCIGHRDAPRASSPPRHTQFDPFPFLPDTSDAEHQCQSPQHEPLQLP +APVCIGYRDAPRASSPPRQAPEPSLLFQDLPRASTESLVPSMDSLHECPHIPTPVCIGHR +DAPSFSSPPRQAPEPSLFFQDPPGTSMESLAPSTDSLHGSPVLIPQVCIGHRDAPRASSP +PRHPPSDLAFLAPSPSPGSSGGSRGSAPPGETRHNLEREEYTVLADLPPPRRLAQRQPGP +QAQCSSGGRTHSPGRAEVERLFGQERRKSEAAGAFQAQDEGRSQQPSQGQSQLLRRQSSP +APSRQVTMLPAKQAELTRRSQAEPPHPWSPEKRPEGDRQLQGSPLPPRTSARTPERELRT +QRPLESGQAGPRQPLGVWQSQEEPPGSQGPHRHLERSWSSQEGGLGPGGWWGCGEPSLGA +AKAPEGAWGGTSREYKESWGQPEAWEEKPTHELPRELGKRSPLTSPPENWGGPAESSQSW +HSGTPTAVGWGAEGACPYPRGSERRPELDWRDLLGLLRAPGEGVWARVPSLDWEGLLELL +QARLPRKDPAGHRDDLARALGPELGPPGTNDVPEQESHSQPEGWAEATPVNGHSPALQSQ +SPVQLPSPACTSTQWPKIKVTRGPATATLAGLEQTGPLGSRSTAKGPSLPELQFQPEEPE +ESEPSRGQDPLTDQKQADSADKRPAEGKAGSPLKGRLVTSWRMPGDRPTLFNPFLLSLGV +LRWRRPDLLNFKKGWMSILDEPGEPPSPSLTTTSTSQWKKHWFVLTDSSLKYYRDSTAEE +ADELDGEIDLRSCTDVTEYAVQRNYGFQIHTKDAVYTLSAMTSGIRRNWIEALRKTVRPT +SAPDVTKLSDSNKENALHSYSTQKGPLKAGEQRAGSEVISRGGPRKADGQRQALDYVELS +PLTQASPQRARTPARTPDRLAKQEELERDLAQRSEERRKWFEATDSRTPEVPAGEGPRRG +LGAPLTEDQQNRLSEEIEKKWQELEKLPLRENKRVPLTALLNQSRGERRGPPSDGHEALE +KEVQALRAQLEAWRLQGEAPQSALRSQEDGHIPPGYISQEACERSLAEMESSHQQVMEEL +QRHHERELQRLQQEKEWLLAEETAATASAIEAMKKAYQEELSRELSKTRSLQQGPDGLRK +QHQSDVEALKRELQVLSEQYSQKCLEIGALMRQAEEREHTLRRCQQEGQELLRHNQELHG +RLSEEIDQLRGFIASQGMGNGCGRSNERSSCELEVLLRVKENELQYLKKEVQCLRDELQM +MQKDKRFTSGKYQDVYVELSHIKTRSEREIEQLKEHLRLAMAALQEKESMRNSLAEYSTG +QGSGEKAGCPWSGTGQH +>31 +GMYQPCDDMDCLSDRCKILQV +>32 +GMYQPCDDMDYLSDRCKILQV +>33 +ASVLCQRAKVAMSHFEPNEYI +>34 +ASVLCQRAKVEMSHFEPNEYI +>35 +AIAGTLKFNPETDYLTGTDGK +>36 +AIAGTLKFNPQTDYLTGTDGK +>37 +WLYYSYGLLHTYGSGGYALYF +>38 +WLYYSYGLLHIYGSGGYALYF +>39 +TSAKTTVVVTAQKRNSRRQLP +>40 +TSAKTTVVVTTQKRNSRRQLP +>41 +VPALGWEFLASTRLTSELNFL +>42 +VPALGWEFLAFTRLTSELNFL +>43 +TRVSLFGALVHSRTYDMDVRL +>44 +TRVSLFGALVRSRTYDMDVRL +>44_duplicate +TRVSLFGALVRSRTYDMDVRL +>45 +AREERTEASGSESRVEPPHEN +>46 +AREERTEASGYESRVEPPHEN diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.9.fa.split_1-48 b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.9.fa.split_1-48 new file mode 100644 index 000000000..b82be2879 --- /dev/null +++ b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.9.fa.split_1-48 @@ -0,0 +1,128 @@ +>1 +LPLPPPPLLPLLLLLLGASGG +>2 +LPLPPPPLLPLLPLLLLLGASGG +>3 +DPASAAAAAAAAAAAAAAVIPTVSTPPP +>4 +DPASAAAAAAAVIPTVSTPPP +>5 +VNSATLSRTLLAAAGGSSLQ +>6 +VNSATLSRTLLLAAAGGSSLQ +>7 +GRSGGSHVWTRSRDPEGSSRK +>8 +GRSGGSHVWTHSRDPEGSSRK +>9 +RQKAVRPLELAYCLQKCNVPL +>10 +RQKAVRPLELVYCLQKCNVPL +>11 +NNKSVNEALNHLLTEEEDYQG +>12 +NNKSVNEALNNLLTEEEDYQG +>13 +KEDAVQGIANEDAAQGIAKED +>14 +KEDAVQGIANQDAAQGIAKED +>15 +PGKTVTISCTRSSGSIASNYV +>16 +PGKTVTISCTGSSGSIASNYV +>17 +VQWYQQRPGSSPTTVIYEDNQ +>18 +VQWYQQRPGSAPTTVIYEDNQ +>19 +VLHHEDLIGKPGGVSLSKIER +>20 +VLHHEDLIGKHGGVSLSKIER +>21 +VKSPVKEEEKPQEVKVKEPPK +>22 +VKSPVKEEEKTQEVKVKEPPK +>23 +SLMVCELAGNPFNCECDLFGF +>24 +SLMVCELAGNLFNCECDLFGF +>25 +SHLSYLSVRGGFNMSSFKLKE +>26 +SHLSYLSVRGGFNMSSFKLKQ +>27 +SLLHTVSPEPPRPPQQPVPTE +>28 +SLLHTVSPEPARPPQQPVPTE +>29 +EDTGGGGRSAGQHWARLRGE +>30 +EDTGGGGRSAQHWARLRGESGLSLERHRSTLTQASSMTPHSGPRSTTSQASPAQRDTAQA +ASTREIPRASSPHRITQRDTSRASSTQQEISRASSTQQETSRASSTQEDTPRASSTQEDT +PRASSTQWNTPRASSPSRSTQLDNPRTSSTQQDNPQTSFPTCTPQRENPRTPCVQQDDPR +ASSPNRTTQRENSRTSCAQRDNPKASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRD +NPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDN +PRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRAARDNPTTSCAQRDNPRASRTSSPNR +ATRDNPRTSCAQRDNPRASSPNRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCA +QRDNPRASSPNRTTQQDSPRTSCARRDDPRASSPNRTIQQENPRTSCALRDNPRASSPSR +TIQQENPRTSCAQRDDPRASSPNRTTQQENPRTSCARRDNPRASSRNRTIQRDNPRTSCA +QRDNPRASSPNRTIQQENLRTSCTRQDNPRTSSPNRATRDNPRTSCAQRDNLRASSPIRA +TQQDNPRTCIQQNIPRSSSTQQDNPKTSCTKRDNLRPTCTQRDRTQSFSFQRDNPGTSSS +QCCTQKENLRPSSPHRSTQWNNPRNSSPHRTNKDIPWASFPLRPTQSDGPRTSSPSRSKQ +SEVPWASIALRPTQGDRPQTSSPSRPAQHDPPQSSFGPTQYNLPSRATSSSHNPGHQSTS +RTSSPVYPAAYGAPLTSPEPSQPPCAVCIGHRDAPRASSPPRYLQHDPFPFFPEPRAPES +EPPHHEPPYIPPAVCIGHRDAPRASSPPRHTQFDPFPFLPDTSDAEHQCQSPQHEPLQLP +APVCIGYRDAPRASSPPRQAPEPSLLFQDLPRASTESLVPSMDSLHECPHIPTPVCIGHR +DAPSFSSPPRQAPEPSLFFQDPPGTSMESLAPSTDSLHGSPVLIPQVCIGHRDAPRASSP +PRHPPSDLAFLAPSPSPGSSGGSRGSAPPGETRHNLEREEYTVLADLPPPRRLAQRQPGP +QAQCSSGGRTHSPGRAEVERLFGQERRKSEAAGAFQAQDEGRSQQPSQGQSQLLRRQSSP +APSRQVTMLPAKQAELTRRSQAEPPHPWSPEKRPEGDRQLQGSPLPPRTSARTPERELRT +QRPLESGQAGPRQPLGVWQSQEEPPGSQGPHRHLERSWSSQEGGLGPGGWWGCGEPSLGA +AKAPEGAWGGTSREYKESWGQPEAWEEKPTHELPRELGKRSPLTSPPENWGGPAESSQSW +HSGTPTAVGWGAEGACPYPRGSERRPELDWRDLLGLLRAPGEGVWARVPSLDWEGLLELL +QARLPRKDPAGHRDDLARALGPELGPPGTNDVPEQESHSQPEGWAEATPVNGHSPALQSQ +SPVQLPSPACTSTQWPKIKVTRGPATATLAGLEQTGPLGSRSTAKGPSLPELQFQPEEPE +ESEPSRGQDPLTDQKQADSADKRPAEGKAGSPLKGRLVTSWRMPGDRPTLFNPFLLSLGV +LRWRRPDLLNFKKGWMSILDEPGEPPSPSLTTTSTSQWKKHWFVLTDSSLKYYRDSTAEE +ADELDGEIDLRSCTDVTEYAVQRNYGFQIHTKDAVYTLSAMTSGIRRNWIEALRKTVRPT +SAPDVTKLSDSNKENALHSYSTQKGPLKAGEQRAGSEVISRGGPRKADGQRQALDYVELS +PLTQASPQRARTPARTPDRLAKQEELERDLAQRSEERRKWFEATDSRTPEVPAGEGPRRG +LGAPLTEDQQNRLSEEIEKKWQELEKLPLRENKRVPLTALLNQSRGERRGPPSDGHEALE +KEVQALRAQLEAWRLQGEAPQSALRSQEDGHIPPGYISQEACERSLAEMESSHQQVMEEL +QRHHERELQRLQQEKEWLLAEETAATASAIEAMKKAYQEELSRELSKTRSLQQGPDGLRK +QHQSDVEALKRELQVLSEQYSQKCLEIGALMRQAEEREHTLRRCQQEGQELLRHNQELHG +RLSEEIDQLRGFIASQGMGNGCGRSNERSSCELEVLLRVKENELQYLKKEVQCLRDELQM +MQKDKRFTSGKYQDVYVELSHIKTRSEREIEQLKEHLRLAMAALQEKESMRNSLAEYSTG +QGSGEKAGCPWSGTGQH +>31 +GMYQPCDDMDCLSDRCKILQV +>32 +GMYQPCDDMDYLSDRCKILQV +>33 +ASVLCQRAKVAMSHFEPNEYI +>34 +ASVLCQRAKVEMSHFEPNEYI +>35 +AIAGTLKFNPETDYLTGTDGK +>36 +AIAGTLKFNPQTDYLTGTDGK +>37 +WLYYSYGLLHTYGSGGYALYF +>38 +WLYYSYGLLHIYGSGGYALYF +>39 +TSAKTTVVVTAQKRNSRRQLP +>40 +TSAKTTVVVTTQKRNSRRQLP +>41 +VPALGWEFLASTRLTSELNFL +>42 +VPALGWEFLAFTRLTSELNFL +>43 +TRVSLFGALVHSRTYDMDVRL +>44 +TRVSLFGALVRSRTYDMDVRL +>45 +AREERTEASGSESRVEPPHEN +>46 +AREERTEASGYESRVEPPHEN diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.9.fa.split_1-48.key b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.9.fa.split_1-48.key new file mode 100644 index 000000000..4ce275668 --- /dev/null +++ b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.9.fa.split_1-48.key @@ -0,0 +1,94 @@ +1: +- '1' +- 1_duplicate +2: +- '2' +3: +- '3' +4: +- '4' +5: +- '5' +6: +- '6' +7: +- '7' +8: +- '8' +9: +- '9' +10: +- '10' +11: +- '11' +12: +- '12' +13: +- '13' +14: +- '14' +15: +- '15' +16: +- '16' +17: +- '17' +18: +- '18' +19: +- '19' +20: +- '20' +21: +- '21' +22: +- '22' +23: +- '23' +24: +- '24' +25: +- '25' +26: +- '26' +27: +- '27' +28: +- '28' +29: +- '29' +30: +- '30' +31: +- '31' +32: +- '32' +33: +- '33' +34: +- '34' +35: +- '35' +36: +- '36' +37: +- '37' +38: +- '38' +39: +- '39' +40: +- '40' +41: +- '41' +42: +- '42' +43: +- '43' +44: +- '44' +- 44_duplicate +45: +- '45' +46: +- '46' diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.fa.split_1-46 b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.fa.split_1-46 deleted file mode 100644 index ec0bb1cf9..000000000 --- a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test.fa.split_1-46 +++ /dev/null @@ -1,92 +0,0 @@ ->1 -LPLPPPPLLPLLLLLLGASGG ->2 -LPLPPPPLLPLLPLLLLLGASGG ->3 -DPASAAAAAAAAAAAAAAVIPTVSTPPP ->4 -DPASAAAAAAAVIPTVSTPPP ->5 -VNSATLSRTLLAAAGGSSLQ ->6 -VNSATLSRTLLLAAAGGSSLQ ->7 -GRSGGSHVWTRSRDPEGSSRK ->8 -GRSGGSHVWTHSRDPEGSSRK ->9 -RQKAVRPLELAYCLQKCNVPL ->10 -RQKAVRPLELVYCLQKCNVPL ->11 -NNKSVNEALNHLLTEEEDYQG ->12 -NNKSVNEALNNLLTEEEDYQG ->13 -KEDAVQGIANEDAAQGIAKED ->14 -KEDAVQGIANQDAAQGIAKED ->15 -PGKTVTISCTRSSGSIASNYV ->16 -PGKTVTISCTGSSGSIASNYV ->17 -VQWYQQRPGSSPTTVIYEDNQ ->18 -VQWYQQRPGSAPTTVIYEDNQ ->19 -VLHHEDLIGKPGGVSLSKIER ->20 -VLHHEDLIGKHGGVSLSKIER ->21 -VKSPVKEEEKPQEVKVKEPPK ->22 -VKSPVKEEEKTQEVKVKEPPK ->23 -SLMVCELAGNPFNCECDLFGF ->24 -SLMVCELAGNLFNCECDLFGF ->25 -SHLSYLSVRGGFNMSSFKLKE ->26 -SHLSYLSVRGGFNMSSFKLKQ ->27 -SLLHTVSPEPPRPPQQPVPTE ->28 -SLLHTVSPEPARPPQQPVPTE ->29 -EDTGGGGRSAGQHWARLRGE ->30 -EDTGGGGRSAQHWARLRGESGLSLERHRSTLTQASSMTPHSGPRSTTSQASPAQRDTAQAASTREIPRASSPHRITQRDTSRASSTQQEISRASSTQQETSRASSTQEDTPRASSTQEDTPRASSTQWNTPRASSPSRSTQLDNPRTSSTQQDNPQTSFPTCTPQRENPRTPCVQQDDPRASSPNRTTQRENSRTSCAQRDNPKASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRAARDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRTTQQDSPRTSCARRDDPRASSPNRTIQQENPRTSCALRDNPRASSPSRTIQQENPRTSCAQRDDPRASSPNRTTQQENPRTSCARRDNPRASSRNRTIQRDNPRTSCAQRDNPRASSPNRTIQQENLRTSCTRQDNPRTSSPNRATRDNPRTSCAQRDNLRASSPIRATQQDNPRTCIQQNIPRSSSTQQDNPKTSCTKRDNLRPTCTQRDRTQSFSFQRDNPGTSSSQCCTQKENLRPSSPHRSTQWNNPRNSSPHRTNKDIPWASFPLRPTQSDGPRTSSPSRSKQSEVPWASIALRPTQGDRPQTSSPSRPAQHDPPQSSFGPTQYNLPSRATSSSHNPGHQSTSRTSSPVYPAAYGAPLTSPEPSQPPCAVCIGHRDAPRASSPPRYLQHDPFPFFPEPRAPESEPPHHEPPYIPPAVCIGHRDAPRASSPPRHTQFDPFPFLPDTSDAEHQCQSPQHEPLQLPAPVCIGYRDAPRASSPPRQAPEPSLLFQDLPRASTESLVPSMDSLHECPHIPTPVCIGHRDAPSFSSPPRQAPEPSLFFQDPPGTSMESLAPSTDSLHGSPVLIPQVCIGHRDAPRASSPPRHPPSDLAFLAPSPSPGSSGGSRGSAPPGETRHNLEREEYTVLADLPPPRRLAQRQPGPQAQCSSGGRTHSPGRAEVERLFGQERRKSEAAGAFQAQDEGRSQQPSQGQSQLLRRQSSPAPSRQVTMLPAKQAELTRRSQAEPPHPWSPEKRPEGDRQLQGSPLPPRTSARTPERELRTQRPLESGQAGPRQPLGVWQSQEEPPGSQGPHRHLERSWSSQEGGLGPGGWWGCGEPSLGAAKAPEGAWGGTSREYKESWGQPEAWEEKPTHELPRELGKRSPLTSPPENWGGPAESSQSWHSGTPTAVGWGAEGACPYPRGSERRPELDWRDLLGLLRAPGEGVWARVPSLDWEGLLELLQARLPRKDPAGHRDDLARALGPELGPPGTNDVPEQESHSQPEGWAEATPVNGHSPALQSQSPVQLPSPACTSTQWPKIKVTRGPATATLAGLEQTGPLGSRSTAKGPSLPELQFQPEEPEESEPSRGQDPLTDQKQADSADKRPAEGKAGSPLKGRLVTSWRMPGDRPTLFNPFLLSLGVLRWRRPDLLNFKKGWMSILDEPGEPPSPSLTTTSTSQWKKHWFVLTDSSLKYYRDSTAEEADELDGEIDLRSCTDVTEYAVQRNYGFQIHTKDAVYTLSAMTSGIRRNWIEALRKTVRPTSAPDVTKLSDSNKENALHSYSTQKGPLKAGEQRAGSEVISRGGPRKADGQRQALDYVELSPLTQASPQRARTPARTPDRLAKQEELERDLAQRSEERRKWFEATDSRTPEVPAGEGPRRGLGAPLTEDQQNRLSEEIEKKWQELEKLPLRENKRVPLTALLNQSRGERRGPPSDGHEALEKEVQALRAQLEAWRLQGEAPQSALRSQEDGHIPPGYISQEACERSLAEMESSHQQVMEELQRHHERELQRLQQEKEWLLAEETAATASAIEAMKKAYQEELSRELSKTRSLQQGPDGLRKQHQSDVEALKRELQVLSEQYSQKCLEIGALMRQAEEREHTLRRCQQEGQELLRHNQELHGRLSEEIDQLRGFIASQGMGNGCGRSNERSSCELEVLLRVKENELQYLKKEVQCLRDELQMMQKDKRFTSGKYQDVYVELSHIKTRSEREIEQLKEHLRLAMAALQEKESMRNSLAEYSTGQGSGEKAGCPWSGTGQH ->31 -GMYQPCDDMDCLSDRCKILQV ->32 -GMYQPCDDMDYLSDRCKILQV ->33 -ASVLCQRAKVAMSHFEPNEYI ->34 -ASVLCQRAKVEMSHFEPNEYI ->35 -AIAGTLKFNPETDYLTGTDGK ->36 -AIAGTLKFNPQTDYLTGTDGK ->37 -WLYYSYGLLHTYGSGGYALYF ->38 -WLYYSYGLLHIYGSGGYALYF ->39 -TSAKTTVVVTAQKRNSRRQLP ->40 -TSAKTTVVVTTQKRNSRRQLP ->41 -VPALGWEFLASTRLTSELNFL ->42 -VPALGWEFLAFTRLTSELNFL ->43 -TRVSLFGALVHSRTYDMDVRL ->44 -TRVSLFGALVRSRTYDMDVRL ->45 -AREERTEASGSESRVEPPHEN ->46 -AREERTEASGYESRVEPPHEN diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test_21.fa.split_1-48 b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test_21.fa.split_1-48 deleted file mode 100644 index ec0bb1cf9..000000000 --- a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test_21.fa.split_1-48 +++ /dev/null @@ -1,92 +0,0 @@ ->1 -LPLPPPPLLPLLLLLLGASGG ->2 -LPLPPPPLLPLLPLLLLLGASGG ->3 -DPASAAAAAAAAAAAAAAVIPTVSTPPP ->4 -DPASAAAAAAAVIPTVSTPPP ->5 -VNSATLSRTLLAAAGGSSLQ ->6 -VNSATLSRTLLLAAAGGSSLQ ->7 -GRSGGSHVWTRSRDPEGSSRK ->8 -GRSGGSHVWTHSRDPEGSSRK ->9 -RQKAVRPLELAYCLQKCNVPL ->10 -RQKAVRPLELVYCLQKCNVPL ->11 -NNKSVNEALNHLLTEEEDYQG ->12 -NNKSVNEALNNLLTEEEDYQG ->13 -KEDAVQGIANEDAAQGIAKED ->14 -KEDAVQGIANQDAAQGIAKED ->15 -PGKTVTISCTRSSGSIASNYV ->16 -PGKTVTISCTGSSGSIASNYV ->17 -VQWYQQRPGSSPTTVIYEDNQ ->18 -VQWYQQRPGSAPTTVIYEDNQ ->19 -VLHHEDLIGKPGGVSLSKIER ->20 -VLHHEDLIGKHGGVSLSKIER ->21 -VKSPVKEEEKPQEVKVKEPPK ->22 -VKSPVKEEEKTQEVKVKEPPK ->23 -SLMVCELAGNPFNCECDLFGF ->24 -SLMVCELAGNLFNCECDLFGF ->25 -SHLSYLSVRGGFNMSSFKLKE ->26 -SHLSYLSVRGGFNMSSFKLKQ ->27 -SLLHTVSPEPPRPPQQPVPTE ->28 -SLLHTVSPEPARPPQQPVPTE ->29 -EDTGGGGRSAGQHWARLRGE ->30 -EDTGGGGRSAQHWARLRGESGLSLERHRSTLTQASSMTPHSGPRSTTSQASPAQRDTAQAASTREIPRASSPHRITQRDTSRASSTQQEISRASSTQQETSRASSTQEDTPRASSTQEDTPRASSTQWNTPRASSPSRSTQLDNPRTSSTQQDNPQTSFPTCTPQRENPRTPCVQQDDPRASSPNRTTQRENSRTSCAQRDNPKASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRAARDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRTTQQDSPRTSCARRDDPRASSPNRTIQQENPRTSCALRDNPRASSPSRTIQQENPRTSCAQRDDPRASSPNRTTQQENPRTSCARRDNPRASSRNRTIQRDNPRTSCAQRDNPRASSPNRTIQQENLRTSCTRQDNPRTSSPNRATRDNPRTSCAQRDNLRASSPIRATQQDNPRTCIQQNIPRSSSTQQDNPKTSCTKRDNLRPTCTQRDRTQSFSFQRDNPGTSSSQCCTQKENLRPSSPHRSTQWNNPRNSSPHRTNKDIPWASFPLRPTQSDGPRTSSPSRSKQSEVPWASIALRPTQGDRPQTSSPSRPAQHDPPQSSFGPTQYNLPSRATSSSHNPGHQSTSRTSSPVYPAAYGAPLTSPEPSQPPCAVCIGHRDAPRASSPPRYLQHDPFPFFPEPRAPESEPPHHEPPYIPPAVCIGHRDAPRASSPPRHTQFDPFPFLPDTSDAEHQCQSPQHEPLQLPAPVCIGYRDAPRASSPPRQAPEPSLLFQDLPRASTESLVPSMDSLHECPHIPTPVCIGHRDAPSFSSPPRQAPEPSLFFQDPPGTSMESLAPSTDSLHGSPVLIPQVCIGHRDAPRASSPPRHPPSDLAFLAPSPSPGSSGGSRGSAPPGETRHNLEREEYTVLADLPPPRRLAQRQPGPQAQCSSGGRTHSPGRAEVERLFGQERRKSEAAGAFQAQDEGRSQQPSQGQSQLLRRQSSPAPSRQVTMLPAKQAELTRRSQAEPPHPWSPEKRPEGDRQLQGSPLPPRTSARTPERELRTQRPLESGQAGPRQPLGVWQSQEEPPGSQGPHRHLERSWSSQEGGLGPGGWWGCGEPSLGAAKAPEGAWGGTSREYKESWGQPEAWEEKPTHELPRELGKRSPLTSPPENWGGPAESSQSWHSGTPTAVGWGAEGACPYPRGSERRPELDWRDLLGLLRAPGEGVWARVPSLDWEGLLELLQARLPRKDPAGHRDDLARALGPELGPPGTNDVPEQESHSQPEGWAEATPVNGHSPALQSQSPVQLPSPACTSTQWPKIKVTRGPATATLAGLEQTGPLGSRSTAKGPSLPELQFQPEEPEESEPSRGQDPLTDQKQADSADKRPAEGKAGSPLKGRLVTSWRMPGDRPTLFNPFLLSLGVLRWRRPDLLNFKKGWMSILDEPGEPPSPSLTTTSTSQWKKHWFVLTDSSLKYYRDSTAEEADELDGEIDLRSCTDVTEYAVQRNYGFQIHTKDAVYTLSAMTSGIRRNWIEALRKTVRPTSAPDVTKLSDSNKENALHSYSTQKGPLKAGEQRAGSEVISRGGPRKADGQRQALDYVELSPLTQASPQRARTPARTPDRLAKQEELERDLAQRSEERRKWFEATDSRTPEVPAGEGPRRGLGAPLTEDQQNRLSEEIEKKWQELEKLPLRENKRVPLTALLNQSRGERRGPPSDGHEALEKEVQALRAQLEAWRLQGEAPQSALRSQEDGHIPPGYISQEACERSLAEMESSHQQVMEELQRHHERELQRLQQEKEWLLAEETAATASAIEAMKKAYQEELSRELSKTRSLQQGPDGLRKQHQSDVEALKRELQVLSEQYSQKCLEIGALMRQAEEREHTLRRCQQEGQELLRHNQELHGRLSEEIDQLRGFIASQGMGNGCGRSNERSSCELEVLLRVKENELQYLKKEVQCLRDELQMMQKDKRFTSGKYQDVYVELSHIKTRSEREIEQLKEHLRLAMAALQEKESMRNSLAEYSTGQGSGEKAGCPWSGTGQH ->31 -GMYQPCDDMDCLSDRCKILQV ->32 -GMYQPCDDMDYLSDRCKILQV ->33 -ASVLCQRAKVAMSHFEPNEYI ->34 -ASVLCQRAKVEMSHFEPNEYI ->35 -AIAGTLKFNPETDYLTGTDGK ->36 -AIAGTLKFNPQTDYLTGTDGK ->37 -WLYYSYGLLHTYGSGGYALYF ->38 -WLYYSYGLLHIYGSGGYALYF ->39 -TSAKTTVVVTAQKRNSRRQLP ->40 -TSAKTTVVVTTQKRNSRRQLP ->41 -VPALGWEFLASTRLTSELNFL ->42 -VPALGWEFLAFTRLTSELNFL ->43 -TRVSLFGALVHSRTYDMDVRL ->44 -TRVSLFGALVRSRTYDMDVRL ->45 -AREERTEASGSESRVEPPHEN ->46 -AREERTEASGYESRVEPPHEN diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test_21.fa.split_1-48.key b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test_21.fa.split_1-48.key deleted file mode 100644 index 96108ebca..000000000 --- a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_I/tmp/Test_21.fa.split_1-48.key +++ /dev/null @@ -1,92 +0,0 @@ -1: -- WT.1.IGFBP2.ENST00000233809.inframe_ins.20L/LLP -2: -- MT.1.IGFBP2.ENST00000233809.inframe_ins.20L/LLP -3: -- WT.2.RBM47.ENST00000381793.inframe_del.495-502AAAAAAAA/A -4: -- MT.2.RBM47.ENST00000381793.inframe_del.495-502AAAAAAAA/A -5: -- WT.3.PRICKLE4.ENST00000458694.inframe_ins.287-288-/L -6: -- MT.3.PRICKLE4.ENST00000458694.inframe_ins.287-288-/L -7: -- WT.5.CECR2.ENST00000262608.missense.535R/H -8: -- MT.5.CECR2.ENST00000262608.missense.535R/H -9: -- WT.6.USP18.ENST00000215794.missense.124A/V -10: -- MT.6.USP18.ENST00000215794.missense.124A/V -11: -- WT.7.CLTCL1.ENST00000263200.missense.1469H/N -12: -- MT.7.CLTCL1.ENST00000263200.missense.1469H/N -13: -- WT.8.FAM230A.ENST00000434783.missense.322E/Q -14: -- MT.8.FAM230A.ENST00000434783.missense.322E/Q -15: -- WT.9.IGLV6-57.ENST00000390285.missense.43R/G -16: -- MT.9.IGLV6-57.ENST00000390285.missense.43R/G -17: -- WT.10.IGLV6-57.ENST00000390285.missense.63S/A -18: -- MT.10.IGLV6-57.ENST00000390285.missense.63S/A -19: -- WT.11.TPST2.ENST00000338754.missense.274P/H -20: -- MT.11.TPST2.ENST00000338754.missense.274P/H -21: -- WT.12.NEFH.ENST00000310624.missense.830P/T -22: -- MT.12.NEFH.ENST00000310624.missense.830P/T -23: -- WT.13.ELFN2.ENST00000402918.missense.186P/L -24: -- MT.13.ELFN2.ENST00000402918.missense.186P/L -25: -- WT.14.LGALS2.ENST00000215886.missense.132E/Q -26: -- MT.14.LGALS2.ENST00000215886.missense.132E/Q -27: -- WT.15.GGA1.ENST00000343632.missense.484P/A -28: -- MT.15.GGA1.ENST00000343632.missense.484P/A -29: -- WT.16.TRIOBP.ENST00000406386.FS.219GA/G -30: -- MT.16.TRIOBP.ENST00000406386.FS.219GA/G -31: -- WT.17.CACNA1I.ENST00000402142.missense.107C/Y -32: -- MT.17.CACNA1I.ENST00000402142.missense.107C/Y -33: -- WT.18.ACO2.ENST00000216254.missense.33A/E -34: -- MT.18.ACO2.ENST00000216254.missense.33A/E -35: -- WT.19.ACO2.ENST00000216254.missense.510E/Q -36: -- MT.19.ACO2.ENST00000216254.missense.510E/Q -37: -- WT.20.PKDREJ.ENST00000253255.missense.1875T/I -38: -- MT.20.PKDREJ.ENST00000253255.missense.1875T/I -39: -- WT.21.MOV10L1.ENST00000262794.missense.482A/T -40: -- MT.21.MOV10L1.ENST00000262794.missense.482A/T -41: -- WT.22.PANX2.ENST00000395842.missense.147S/F -42: -- MT.22.PANX2.ENST00000395842.missense.147S/F -43: -- WT.23.TUBGCP6.ENST00000248846.missense.220H/R -44: -- MT.23.TUBGCP6.ENST00000248846.missense.220H/R -45: -- WT.24.PPP6R2.ENST00000395741.missense.414S/Y -46: -- MT.24.PPP6R2.ENST00000395741.missense.414S/Y diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.15.fa b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.15.fa new file mode 100644 index 000000000..f559bef16 --- /dev/null +++ b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.15.fa @@ -0,0 +1,132 @@ +>1 +LPLPPPPLLPLLLLLLGASGG +>1_duplicate +LPLPPPPLLPLLLLLLGASGG +>2 +LPLPPPPLLPLLPLLLLLGASGG +>3 +DPASAAAAAAAAAAAAAAVIPTVSTPPP +>4 +DPASAAAAAAAVIPTVSTPPP +>5 +VNSATLSRTLLAAAGGSSLQ +>6 +VNSATLSRTLLLAAAGGSSLQ +>7 +GRSGGSHVWTRSRDPEGSSRK +>8 +GRSGGSHVWTHSRDPEGSSRK +>9 +RQKAVRPLELAYCLQKCNVPL +>10 +RQKAVRPLELVYCLQKCNVPL +>11 +NNKSVNEALNHLLTEEEDYQG +>12 +NNKSVNEALNNLLTEEEDYQG +>13 +KEDAVQGIANEDAAQGIAKED +>14 +KEDAVQGIANQDAAQGIAKED +>15 +PGKTVTISCTRSSGSIASNYV +>16 +PGKTVTISCTGSSGSIASNYV +>17 +VQWYQQRPGSSPTTVIYEDNQ +>18 +VQWYQQRPGSAPTTVIYEDNQ +>19 +VLHHEDLIGKPGGVSLSKIER +>20 +VLHHEDLIGKHGGVSLSKIER +>21 +VKSPVKEEEKPQEVKVKEPPK +>22 +VKSPVKEEEKTQEVKVKEPPK +>23 +SLMVCELAGNPFNCECDLFGF +>24 +SLMVCELAGNLFNCECDLFGF +>25 +SHLSYLSVRGGFNMSSFKLKE +>26 +SHLSYLSVRGGFNMSSFKLKQ +>27 +SLLHTVSPEPPRPPQQPVPTE +>28 +SLLHTVSPEPARPPQQPVPTE +>29 +EDTGGGGRSAGQHWARLRGE +>30 +EDTGGGGRSAQHWARLRGESGLSLERHRSTLTQASSMTPHSGPRSTTSQASPAQRDTAQA +ASTREIPRASSPHRITQRDTSRASSTQQEISRASSTQQETSRASSTQEDTPRASSTQEDT +PRASSTQWNTPRASSPSRSTQLDNPRTSSTQQDNPQTSFPTCTPQRENPRTPCVQQDDPR +ASSPNRTTQRENSRTSCAQRDNPKASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRD +NPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDN +PRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRAARDNPTTSCAQRDNPRASRTSSPNR +ATRDNPRTSCAQRDNPRASSPNRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCA +QRDNPRASSPNRTTQQDSPRTSCARRDDPRASSPNRTIQQENPRTSCALRDNPRASSPSR +TIQQENPRTSCAQRDDPRASSPNRTTQQENPRTSCARRDNPRASSRNRTIQRDNPRTSCA +QRDNPRASSPNRTIQQENLRTSCTRQDNPRTSSPNRATRDNPRTSCAQRDNLRASSPIRA +TQQDNPRTCIQQNIPRSSSTQQDNPKTSCTKRDNLRPTCTQRDRTQSFSFQRDNPGTSSS +QCCTQKENLRPSSPHRSTQWNNPRNSSPHRTNKDIPWASFPLRPTQSDGPRTSSPSRSKQ +SEVPWASIALRPTQGDRPQTSSPSRPAQHDPPQSSFGPTQYNLPSRATSSSHNPGHQSTS +RTSSPVYPAAYGAPLTSPEPSQPPCAVCIGHRDAPRASSPPRYLQHDPFPFFPEPRAPES +EPPHHEPPYIPPAVCIGHRDAPRASSPPRHTQFDPFPFLPDTSDAEHQCQSPQHEPLQLP +APVCIGYRDAPRASSPPRQAPEPSLLFQDLPRASTESLVPSMDSLHECPHIPTPVCIGHR +DAPSFSSPPRQAPEPSLFFQDPPGTSMESLAPSTDSLHGSPVLIPQVCIGHRDAPRASSP +PRHPPSDLAFLAPSPSPGSSGGSRGSAPPGETRHNLEREEYTVLADLPPPRRLAQRQPGP +QAQCSSGGRTHSPGRAEVERLFGQERRKSEAAGAFQAQDEGRSQQPSQGQSQLLRRQSSP +APSRQVTMLPAKQAELTRRSQAEPPHPWSPEKRPEGDRQLQGSPLPPRTSARTPERELRT +QRPLESGQAGPRQPLGVWQSQEEPPGSQGPHRHLERSWSSQEGGLGPGGWWGCGEPSLGA +AKAPEGAWGGTSREYKESWGQPEAWEEKPTHELPRELGKRSPLTSPPENWGGPAESSQSW +HSGTPTAVGWGAEGACPYPRGSERRPELDWRDLLGLLRAPGEGVWARVPSLDWEGLLELL +QARLPRKDPAGHRDDLARALGPELGPPGTNDVPEQESHSQPEGWAEATPVNGHSPALQSQ +SPVQLPSPACTSTQWPKIKVTRGPATATLAGLEQTGPLGSRSTAKGPSLPELQFQPEEPE +ESEPSRGQDPLTDQKQADSADKRPAEGKAGSPLKGRLVTSWRMPGDRPTLFNPFLLSLGV +LRWRRPDLLNFKKGWMSILDEPGEPPSPSLTTTSTSQWKKHWFVLTDSSLKYYRDSTAEE +ADELDGEIDLRSCTDVTEYAVQRNYGFQIHTKDAVYTLSAMTSGIRRNWIEALRKTVRPT +SAPDVTKLSDSNKENALHSYSTQKGPLKAGEQRAGSEVISRGGPRKADGQRQALDYVELS +PLTQASPQRARTPARTPDRLAKQEELERDLAQRSEERRKWFEATDSRTPEVPAGEGPRRG +LGAPLTEDQQNRLSEEIEKKWQELEKLPLRENKRVPLTALLNQSRGERRGPPSDGHEALE +KEVQALRAQLEAWRLQGEAPQSALRSQEDGHIPPGYISQEACERSLAEMESSHQQVMEEL +QRHHERELQRLQQEKEWLLAEETAATASAIEAMKKAYQEELSRELSKTRSLQQGPDGLRK +QHQSDVEALKRELQVLSEQYSQKCLEIGALMRQAEEREHTLRRCQQEGQELLRHNQELHG +RLSEEIDQLRGFIASQGMGNGCGRSNERSSCELEVLLRVKENELQYLKKEVQCLRDELQM +MQKDKRFTSGKYQDVYVELSHIKTRSEREIEQLKEHLRLAMAALQEKESMRNSLAEYSTG +QGSGEKAGCPWSGTGQH +>31 +GMYQPCDDMDCLSDRCKILQV +>32 +GMYQPCDDMDYLSDRCKILQV +>33 +ASVLCQRAKVAMSHFEPNEYI +>34 +ASVLCQRAKVEMSHFEPNEYI +>35 +AIAGTLKFNPETDYLTGTDGK +>36 +AIAGTLKFNPQTDYLTGTDGK +>37 +WLYYSYGLLHTYGSGGYALYF +>38 +WLYYSYGLLHIYGSGGYALYF +>39 +TSAKTTVVVTAQKRNSRRQLP +>40 +TSAKTTVVVTTQKRNSRRQLP +>41 +VPALGWEFLASTRLTSELNFL +>42 +VPALGWEFLAFTRLTSELNFL +>43 +TRVSLFGALVHSRTYDMDVRL +>44 +TRVSLFGALVRSRTYDMDVRL +>44_duplicate +TRVSLFGALVRSRTYDMDVRL +>45 +AREERTEASGSESRVEPPHEN +>46 +AREERTEASGYESRVEPPHEN diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.15.fa.split_1-48 b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.15.fa.split_1-48 new file mode 100644 index 000000000..b82be2879 --- /dev/null +++ b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.15.fa.split_1-48 @@ -0,0 +1,128 @@ +>1 +LPLPPPPLLPLLLLLLGASGG +>2 +LPLPPPPLLPLLPLLLLLGASGG +>3 +DPASAAAAAAAAAAAAAAVIPTVSTPPP +>4 +DPASAAAAAAAVIPTVSTPPP +>5 +VNSATLSRTLLAAAGGSSLQ +>6 +VNSATLSRTLLLAAAGGSSLQ +>7 +GRSGGSHVWTRSRDPEGSSRK +>8 +GRSGGSHVWTHSRDPEGSSRK +>9 +RQKAVRPLELAYCLQKCNVPL +>10 +RQKAVRPLELVYCLQKCNVPL +>11 +NNKSVNEALNHLLTEEEDYQG +>12 +NNKSVNEALNNLLTEEEDYQG +>13 +KEDAVQGIANEDAAQGIAKED +>14 +KEDAVQGIANQDAAQGIAKED +>15 +PGKTVTISCTRSSGSIASNYV +>16 +PGKTVTISCTGSSGSIASNYV +>17 +VQWYQQRPGSSPTTVIYEDNQ +>18 +VQWYQQRPGSAPTTVIYEDNQ +>19 +VLHHEDLIGKPGGVSLSKIER +>20 +VLHHEDLIGKHGGVSLSKIER +>21 +VKSPVKEEEKPQEVKVKEPPK +>22 +VKSPVKEEEKTQEVKVKEPPK +>23 +SLMVCELAGNPFNCECDLFGF +>24 +SLMVCELAGNLFNCECDLFGF +>25 +SHLSYLSVRGGFNMSSFKLKE +>26 +SHLSYLSVRGGFNMSSFKLKQ +>27 +SLLHTVSPEPPRPPQQPVPTE +>28 +SLLHTVSPEPARPPQQPVPTE +>29 +EDTGGGGRSAGQHWARLRGE +>30 +EDTGGGGRSAQHWARLRGESGLSLERHRSTLTQASSMTPHSGPRSTTSQASPAQRDTAQA +ASTREIPRASSPHRITQRDTSRASSTQQEISRASSTQQETSRASSTQEDTPRASSTQEDT +PRASSTQWNTPRASSPSRSTQLDNPRTSSTQQDNPQTSFPTCTPQRENPRTPCVQQDDPR +ASSPNRTTQRENSRTSCAQRDNPKASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRD +NPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDN +PRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRAARDNPTTSCAQRDNPRASRTSSPNR +ATRDNPRTSCAQRDNPRASSPNRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCA +QRDNPRASSPNRTTQQDSPRTSCARRDDPRASSPNRTIQQENPRTSCALRDNPRASSPSR +TIQQENPRTSCAQRDDPRASSPNRTTQQENPRTSCARRDNPRASSRNRTIQRDNPRTSCA +QRDNPRASSPNRTIQQENLRTSCTRQDNPRTSSPNRATRDNPRTSCAQRDNLRASSPIRA +TQQDNPRTCIQQNIPRSSSTQQDNPKTSCTKRDNLRPTCTQRDRTQSFSFQRDNPGTSSS +QCCTQKENLRPSSPHRSTQWNNPRNSSPHRTNKDIPWASFPLRPTQSDGPRTSSPSRSKQ +SEVPWASIALRPTQGDRPQTSSPSRPAQHDPPQSSFGPTQYNLPSRATSSSHNPGHQSTS +RTSSPVYPAAYGAPLTSPEPSQPPCAVCIGHRDAPRASSPPRYLQHDPFPFFPEPRAPES +EPPHHEPPYIPPAVCIGHRDAPRASSPPRHTQFDPFPFLPDTSDAEHQCQSPQHEPLQLP +APVCIGYRDAPRASSPPRQAPEPSLLFQDLPRASTESLVPSMDSLHECPHIPTPVCIGHR +DAPSFSSPPRQAPEPSLFFQDPPGTSMESLAPSTDSLHGSPVLIPQVCIGHRDAPRASSP +PRHPPSDLAFLAPSPSPGSSGGSRGSAPPGETRHNLEREEYTVLADLPPPRRLAQRQPGP +QAQCSSGGRTHSPGRAEVERLFGQERRKSEAAGAFQAQDEGRSQQPSQGQSQLLRRQSSP +APSRQVTMLPAKQAELTRRSQAEPPHPWSPEKRPEGDRQLQGSPLPPRTSARTPERELRT +QRPLESGQAGPRQPLGVWQSQEEPPGSQGPHRHLERSWSSQEGGLGPGGWWGCGEPSLGA +AKAPEGAWGGTSREYKESWGQPEAWEEKPTHELPRELGKRSPLTSPPENWGGPAESSQSW +HSGTPTAVGWGAEGACPYPRGSERRPELDWRDLLGLLRAPGEGVWARVPSLDWEGLLELL +QARLPRKDPAGHRDDLARALGPELGPPGTNDVPEQESHSQPEGWAEATPVNGHSPALQSQ +SPVQLPSPACTSTQWPKIKVTRGPATATLAGLEQTGPLGSRSTAKGPSLPELQFQPEEPE +ESEPSRGQDPLTDQKQADSADKRPAEGKAGSPLKGRLVTSWRMPGDRPTLFNPFLLSLGV +LRWRRPDLLNFKKGWMSILDEPGEPPSPSLTTTSTSQWKKHWFVLTDSSLKYYRDSTAEE +ADELDGEIDLRSCTDVTEYAVQRNYGFQIHTKDAVYTLSAMTSGIRRNWIEALRKTVRPT +SAPDVTKLSDSNKENALHSYSTQKGPLKAGEQRAGSEVISRGGPRKADGQRQALDYVELS +PLTQASPQRARTPARTPDRLAKQEELERDLAQRSEERRKWFEATDSRTPEVPAGEGPRRG +LGAPLTEDQQNRLSEEIEKKWQELEKLPLRENKRVPLTALLNQSRGERRGPPSDGHEALE +KEVQALRAQLEAWRLQGEAPQSALRSQEDGHIPPGYISQEACERSLAEMESSHQQVMEEL +QRHHERELQRLQQEKEWLLAEETAATASAIEAMKKAYQEELSRELSKTRSLQQGPDGLRK +QHQSDVEALKRELQVLSEQYSQKCLEIGALMRQAEEREHTLRRCQQEGQELLRHNQELHG +RLSEEIDQLRGFIASQGMGNGCGRSNERSSCELEVLLRVKENELQYLKKEVQCLRDELQM +MQKDKRFTSGKYQDVYVELSHIKTRSEREIEQLKEHLRLAMAALQEKESMRNSLAEYSTG +QGSGEKAGCPWSGTGQH +>31 +GMYQPCDDMDCLSDRCKILQV +>32 +GMYQPCDDMDYLSDRCKILQV +>33 +ASVLCQRAKVAMSHFEPNEYI +>34 +ASVLCQRAKVEMSHFEPNEYI +>35 +AIAGTLKFNPETDYLTGTDGK +>36 +AIAGTLKFNPQTDYLTGTDGK +>37 +WLYYSYGLLHTYGSGGYALYF +>38 +WLYYSYGLLHIYGSGGYALYF +>39 +TSAKTTVVVTAQKRNSRRQLP +>40 +TSAKTTVVVTTQKRNSRRQLP +>41 +VPALGWEFLASTRLTSELNFL +>42 +VPALGWEFLAFTRLTSELNFL +>43 +TRVSLFGALVHSRTYDMDVRL +>44 +TRVSLFGALVRSRTYDMDVRL +>45 +AREERTEASGSESRVEPPHEN +>46 +AREERTEASGYESRVEPPHEN diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.15.fa.split_1-48.key b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.15.fa.split_1-48.key new file mode 100644 index 000000000..4ce275668 --- /dev/null +++ b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test.15.fa.split_1-48.key @@ -0,0 +1,94 @@ +1: +- '1' +- 1_duplicate +2: +- '2' +3: +- '3' +4: +- '4' +5: +- '5' +6: +- '6' +7: +- '7' +8: +- '8' +9: +- '9' +10: +- '10' +11: +- '11' +12: +- '12' +13: +- '13' +14: +- '14' +15: +- '15' +16: +- '16' +17: +- '17' +18: +- '18' +19: +- '19' +20: +- '20' +21: +- '21' +22: +- '22' +23: +- '23' +24: +- '24' +25: +- '25' +26: +- '26' +27: +- '27' +28: +- '28' +29: +- '29' +30: +- '30' +31: +- '31' +32: +- '32' +33: +- '33' +34: +- '34' +35: +- '35' +36: +- '36' +37: +- '37' +38: +- '38' +39: +- '39' +40: +- '40' +41: +- '41' +42: +- '42' +43: +- '43' +44: +- '44' +- 44_duplicate +45: +- '45' +46: +- '46' diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test_31.fa.split_1-48 b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test_31.fa.split_1-48 deleted file mode 100644 index 758f07eb7..000000000 --- a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test_31.fa.split_1-48 +++ /dev/null @@ -1,92 +0,0 @@ ->1 -VGCPALPLPPPPLLPLLLLLLGASGGGGGAR ->2 -VGCPALPLPPPPLLPLLPLLLLLGASGGGGGAR ->3 -IAVQPDPASAAAAAAAAAAAAAAVIPTVSTPPPFQGRP ->4 -IAVQPDPASAAAAAAAVIPTVSTPPPFQGRP ->5 -RDQTSVNSATLSRTLLAAAGGSSLQTQRGL ->6 -RDQTSVNSATLSRTLLLAAAGGSSLQTQRGL ->7 -RRSRAGRSGGSHVWTRSRDPEGSSRKQQPME ->8 -RRSRAGRSGGSHVWTHSRDPEGSSRKQQPME ->9 -KMQDSRQKAVRPLELAYCLQKCNVPLFVQHD ->10 -KMQDSRQKAVRPLELVYCLQKCNVPLFVQHD ->11 -SVQSHNNKSVNEALNHLLTEEEDYQGLRASI ->12 -SVQSHNNKSVNEALNNLLTEEEDYQGLRASI ->13 -AQGISKEDAVQGIANEDAAQGIAKEDPVQGV ->14 -AQGISKEDAVQGIANQDAAQGIAKEDPVQGV ->15 -SVSESPGKTVTISCTRSSGSIASNYVQWYQQ ->16 -SVSESPGKTVTISCTGSSGSIASNYVQWYQQ ->17 -IASNYVQWYQQRPGSSPTTVIYEDNQRPSGV ->18 -IASNYVQWYQQRPGSAPTTVIYEDNQRPSGV ->19 -AWSDAVLHHEDLIGKPGGVSLSKIERSTDQV ->20 -AWSDAVLHHEDLIGKHGGVSLSKIERSTDQV ->21 -PKKEEVKSPVKEEEKPQEVKVKEPPKKAEEE ->22 -PKKEEVKSPVKEEEKTQEVKVKEPPKKAEEE ->23 -FASLASLMVCELAGNPFNCECDLFGFLAWLV ->24 -FASLASLMVCELAGNLFNCECDLFGFLAWLV ->25 -ELTFPNRLGHSHLSYLSVRGGFNMSSFKLKE ->26 -ELTFPNRLGHSHLSYLSVRGGFNMSSFKLKQ ->27 -SSSATSLLHTVSPEPPRPPQQPVPTELSLAS ->28 -SSSATSLLHTVSPEPARPPQQPVPTELSLAS ->29 -AGQKKEDTGGGGRSAGQHWARLRGESGLSL ->30 -AGQKKEDTGGGGRSAQHWARLRGESGLSLERHRSTLTQASSMTPHSGPRSTTSQASPAQRDTAQAASTREIPRASSPHRITQRDTSRASSTQQEISRASSTQQETSRASSTQEDTPRASSTQEDTPRASSTQWNTPRASSPSRSTQLDNPRTSSTQQDNPQTSFPTCTPQRENPRTPCVQQDDPRASSPNRTTQRENSRTSCAQRDNPKASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPSRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRAARDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRATRDNPTTSCAQRDNPRASRTSSPNRATRDNPRTSCAQRDNPRASSPNRTTQQDSPRTSCARRDDPRASSPNRTIQQENPRTSCALRDNPRASSPSRTIQQENPRTSCAQRDDPRASSPNRTTQQENPRTSCARRDNPRASSRNRTIQRDNPRTSCAQRDNPRASSPNRTIQQENLRTSCTRQDNPRTSSPNRATRDNPRTSCAQRDNLRASSPIRATQQDNPRTCIQQNIPRSSSTQQDNPKTSCTKRDNLRPTCTQRDRTQSFSFQRDNPGTSSSQCCTQKENLRPSSPHRSTQWNNPRNSSPHRTNKDIPWASFPLRPTQSDGPRTSSPSRSKQSEVPWASIALRPTQGDRPQTSSPSRPAQHDPPQSSFGPTQYNLPSRATSSSHNPGHQSTSRTSSPVYPAAYGAPLTSPEPSQPPCAVCIGHRDAPRASSPPRYLQHDPFPFFPEPRAPESEPPHHEPPYIPPAVCIGHRDAPRASSPPRHTQFDPFPFLPDTSDAEHQCQSPQHEPLQLPAPVCIGYRDAPRASSPPRQAPEPSLLFQDLPRASTESLVPSMDSLHECPHIPTPVCIGHRDAPSFSSPPRQAPEPSLFFQDPPGTSMESLAPSTDSLHGSPVLIPQVCIGHRDAPRASSPPRHPPSDLAFLAPSPSPGSSGGSRGSAPPGETRHNLEREEYTVLADLPPPRRLAQRQPGPQAQCSSGGRTHSPGRAEVERLFGQERRKSEAAGAFQAQDEGRSQQPSQGQSQLLRRQSSPAPSRQVTMLPAKQAELTRRSQAEPPHPWSPEKRPEGDRQLQGSPLPPRTSARTPERELRTQRPLESGQAGPRQPLGVWQSQEEPPGSQGPHRHLERSWSSQEGGLGPGGWWGCGEPSLGAAKAPEGAWGGTSREYKESWGQPEAWEEKPTHELPRELGKRSPLTSPPENWGGPAESSQSWHSGTPTAVGWGAEGACPYPRGSERRPELDWRDLLGLLRAPGEGVWARVPSLDWEGLLELLQARLPRKDPAGHRDDLARALGPELGPPGTNDVPEQESHSQPEGWAEATPVNGHSPALQSQSPVQLPSPACTSTQWPKIKVTRGPATATLAGLEQTGPLGSRSTAKGPSLPELQFQPEEPEESEPSRGQDPLTDQKQADSADKRPAEGKAGSPLKGRLVTSWRMPGDRPTLFNPFLLSLGVLRWRRPDLLNFKKGWMSILDEPGEPPSPSLTTTSTSQWKKHWFVLTDSSLKYYRDSTAEEADELDGEIDLRSCTDVTEYAVQRNYGFQIHTKDAVYTLSAMTSGIRRNWIEALRKTVRPTSAPDVTKLSDSNKENALHSYSTQKGPLKAGEQRAGSEVISRGGPRKADGQRQALDYVELSPLTQASPQRARTPARTPDRLAKQEELERDLAQRSEERRKWFEATDSRTPEVPAGEGPRRGLGAPLTEDQQNRLSEEIEKKWQELEKLPLRENKRVPLTALLNQSRGERRGPPSDGHEALEKEVQALRAQLEAWRLQGEAPQSALRSQEDGHIPPGYISQEACERSLAEMESSHQQVMEELQRHHERELQRLQQEKEWLLAEETAATASAIEAMKKAYQEELSRELSKTRSLQQGPDGLRKQHQSDVEALKRELQVLSEQYSQKCLEIGALMRQAEEREHTLRRCQQEGQELLRHNQELHGRLSEEIDQLRGFIASQGMGNGCGRSNERSSCELEVLLRVKENELQYLKKEVQCLRDELQMMQKDKRFTSGKYQDVYVELSHIKTRSEREIEQLKEHLRLAMAALQEKESMRNSLAEYSTGQGSGEKAGCPWSGTGQH ->31 -NCVTLGMYQPCDDMDCLSDRCKILQVFDDFI ->32 -NCVTLGMYQPCDDMDYLSDRCKILQVFDDFI ->33 -RQYHVASVLCQRAKVAMSHFEPNEYIHYDLL ->34 -RQYHVASVLCQRAKVEMSHFEPNEYIHYDLL ->35 -IVTALAIAGTLKFNPETDYLTGTDGKKFRLE ->36 -IVTALAIAGTLKFNPQTDYLTGTDGKKFRLE ->37 -PQGTQWLYYSYGLLHTYGSGGYALYFFPEQQ ->38 -PQGTQWLYYSYGLLHIYGSGGYALYFFPEQQ ->39 -SSQALTSAKTTVVVTAQKRNSRRQLPSFLPQ ->40 -SSQALTSAKTTVVVTTQKRNSRRQLPSFLPQ ->41 -AAIMYVPALGWEFLASTRLTSELNFLLQEID ->42 -AAIMYVPALGWEFLAFTRLTSELNFLLQEID ->43 -RFERDTRVSLFGALVHSRTYDMDVRLGLPPV ->44 -RFERDTRVSLFGALVRSRTYDMDVRLGLPPV ->45 -ILSHAAREERTEASGSESRVEPPHENGNRSL ->46 -ILSHAAREERTEASGYESRVEPPHENGNRSL diff --git a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test_31.fa.split_1-48.key b/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test_31.fa.split_1-48.key deleted file mode 100644 index 96108ebca..000000000 --- a/tests/test_data/pvacbind/combine_and_condense/MHC_Class_II/tmp/Test_31.fa.split_1-48.key +++ /dev/null @@ -1,92 +0,0 @@ -1: -- WT.1.IGFBP2.ENST00000233809.inframe_ins.20L/LLP -2: -- MT.1.IGFBP2.ENST00000233809.inframe_ins.20L/LLP -3: -- WT.2.RBM47.ENST00000381793.inframe_del.495-502AAAAAAAA/A -4: -- MT.2.RBM47.ENST00000381793.inframe_del.495-502AAAAAAAA/A -5: -- WT.3.PRICKLE4.ENST00000458694.inframe_ins.287-288-/L -6: -- MT.3.PRICKLE4.ENST00000458694.inframe_ins.287-288-/L -7: -- WT.5.CECR2.ENST00000262608.missense.535R/H -8: -- MT.5.CECR2.ENST00000262608.missense.535R/H -9: -- WT.6.USP18.ENST00000215794.missense.124A/V -10: -- MT.6.USP18.ENST00000215794.missense.124A/V -11: -- WT.7.CLTCL1.ENST00000263200.missense.1469H/N -12: -- MT.7.CLTCL1.ENST00000263200.missense.1469H/N -13: -- WT.8.FAM230A.ENST00000434783.missense.322E/Q -14: -- MT.8.FAM230A.ENST00000434783.missense.322E/Q -15: -- WT.9.IGLV6-57.ENST00000390285.missense.43R/G -16: -- MT.9.IGLV6-57.ENST00000390285.missense.43R/G -17: -- WT.10.IGLV6-57.ENST00000390285.missense.63S/A -18: -- MT.10.IGLV6-57.ENST00000390285.missense.63S/A -19: -- WT.11.TPST2.ENST00000338754.missense.274P/H -20: -- MT.11.TPST2.ENST00000338754.missense.274P/H -21: -- WT.12.NEFH.ENST00000310624.missense.830P/T -22: -- MT.12.NEFH.ENST00000310624.missense.830P/T -23: -- WT.13.ELFN2.ENST00000402918.missense.186P/L -24: -- MT.13.ELFN2.ENST00000402918.missense.186P/L -25: -- WT.14.LGALS2.ENST00000215886.missense.132E/Q -26: -- MT.14.LGALS2.ENST00000215886.missense.132E/Q -27: -- WT.15.GGA1.ENST00000343632.missense.484P/A -28: -- MT.15.GGA1.ENST00000343632.missense.484P/A -29: -- WT.16.TRIOBP.ENST00000406386.FS.219GA/G -30: -- MT.16.TRIOBP.ENST00000406386.FS.219GA/G -31: -- WT.17.CACNA1I.ENST00000402142.missense.107C/Y -32: -- MT.17.CACNA1I.ENST00000402142.missense.107C/Y -33: -- WT.18.ACO2.ENST00000216254.missense.33A/E -34: -- MT.18.ACO2.ENST00000216254.missense.33A/E -35: -- WT.19.ACO2.ENST00000216254.missense.510E/Q -36: -- MT.19.ACO2.ENST00000216254.missense.510E/Q -37: -- WT.20.PKDREJ.ENST00000253255.missense.1875T/I -38: -- MT.20.PKDREJ.ENST00000253255.missense.1875T/I -39: -- WT.21.MOV10L1.ENST00000262794.missense.482A/T -40: -- MT.21.MOV10L1.ENST00000262794.missense.482A/T -41: -- WT.22.PANX2.ENST00000395842.missense.147S/F -42: -- MT.22.PANX2.ENST00000395842.missense.147S/F -43: -- WT.23.TUBGCP6.ENST00000248846.missense.220H/R -44: -- MT.23.TUBGCP6.ENST00000248846.missense.220H/R -45: -- WT.24.PPP6R2.ENST00000395741.missense.414S/Y -46: -- MT.24.PPP6R2.ENST00000395741.missense.414S/Y diff --git a/tests/test_pvacbind.py b/tests/test_pvacbind.py index c21b8cdb1..39db908f8 100644 --- a/tests/test_pvacbind.py +++ b/tests/test_pvacbind.py @@ -60,7 +60,7 @@ def generate_class_ii_call(method, allele, path, input_path): input_path, "MHC_Class_II", "tmp", - "Test.fa.split_1-48" + "Test.15.fa.split_1-48" ), mode='r') text = reader.read() reader.close() @@ -181,8 +181,10 @@ def test_pvacbind_pipeline(self): self.assertTrue(compare(output_file, expected_file)) for file_name in ( - 'Test.fa.split_1-48', - 'Test.fa.split_1-48.key', + 'Test.9.fa.split_1-48', + 'Test.9.fa.split_1-48.key', + 'Test.10.fa.split_1-48', + 'Test.10.fa.split_1-48.key', ): output_file = os.path.join(output_dir.name, 'MHC_Class_I', 'tmp', file_name) expected_file = os.path.join(self.test_data_directory, 'MHC_Class_I', 'tmp', file_name) @@ -210,7 +212,7 @@ def test_pvacbind_pipeline(self): for allele in methods[method].keys(): for length in methods[method][allele]: mock_request.assert_has_calls([ - generate_class_i_call(method, allele, length, os.path.join(output_dir.name, "MHC_Class_I", "tmp", "Test.fa.split_1-48")) + generate_class_i_call(method, allele, length, os.path.join(output_dir.name, "MHC_Class_I", "tmp", "Test.{}.fa.split_1-48".format(length))) ]) output_file = os.path.join(output_dir.name, "MHC_Class_I", "tmp", 'Test.%s.%s.%s.tsv_1-48' % (method, allele, length)) expected_file = os.path.join(self.test_data_directory, "MHC_Class_I", "tmp", 'Test.%s.%s.%s.tsv_1-48' % (method, allele, length)) @@ -226,8 +228,8 @@ def test_pvacbind_pipeline(self): self.assertTrue(compare(output_file, expected_file)) for file_name in ( - 'Test.fa.split_1-48', - 'Test.fa.split_1-48.key', + 'Test.15.fa.split_1-48', + 'Test.15.fa.split_1-48.key', 'Test.nn_align.H2-IAb.15.tsv_1-48', ): output_file = os.path.join(output_dir.name, 'MHC_Class_II', 'tmp', file_name)