Skip to content

Commit

Permalink
Merge branch 'griffithlab:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
Layth17 authored Oct 27, 2023
2 parents 23c0908 + 60dcec2 commit 8ce356e
Show file tree
Hide file tree
Showing 11 changed files with 2,862 additions and 2,836 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
# The short X.Y version.
version = '4.0'
# The full version, including alpha/beta/rc tags.
release = '4.0.3'
release = '4.0.5'


# The language for content autogenerated by Sphinx. Refer to documentation
Expand Down
11 changes: 9 additions & 2 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,15 @@ New in Release |release|

This is a bugfix release. It fixes the following problem(s):

- The fixes in issue in the reference proteome similarity step in pVACseq
where running with non-human data would cause an error.
- In recent releases, users have noticed that at some point during pipeline
runs, predictions to MHCflurry would hang or get killed. We were able to
determine that the cause was related to
`PR 988 <https://github.com/griffithlab/pVACtools/pull/988>`_.
This PR originally updated calls to MHCflurry to happen by instantiating
their predictor within Python instead of calling it on the command line.
However, we suspect that this causes a substantial increase in memory usage
resulting in the observed behavior. This release reverts the change from PR
988.

New in Version |version|
------------------------
Expand Down
23 changes: 23 additions & 0 deletions docs/releases/4_0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,26 @@ This is a bugfix release. It fixes the following problem(s):

- The fixes in issue in the reference proteome similarity step in pVACseq
where running with non-human data would cause an error.

New in Version 4.0.4
--------------------

This is a bugfix release. It fixes the following problem(s):

- This release makes various fixes to allow pVACtools to run with non-human
data.

New in Version 4.0.5
--------------------

This is a bugfix release. It fixes the following problem(s):

- In recent releases, users have noticed that at some point during pipeline
runs, predictions to MHCflurry would hang or get killed. We were able to
determine that the cause was related to
`PR 988 <https://github.com/griffithlab/pVACtools/pull/988>`_.
This PR originally updated calls to MHCflurry to happen by instantiating
their predictor within Python instead of calling it on the command line.
However, we suspect that this causes a substantial increase in memory usage
resulting in the observed behavior. This release reverts the change from PR
988.
8 changes: 1 addition & 7 deletions pvactools/lib/calculate_reference_proteome_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,13 +269,7 @@ def _input_tsv_type(self, line):
def _get_full_peptide(self, line, mt_records_dict, wt_records_dict):
for record_id in mt_records_dict.keys():
(rest_record_id, variant_type, aa_change) = record_id.rsplit(".", 2)
transcript_regex = '^.*(ENS[0-9|A-Z|.]+)$'
transcript_p = re.compile(transcript_regex)
m = transcript_p.match(rest_record_id)
if m:
transcript = m.group(1)
else:
raise Exception("Unexpected record_id format: {}".format(record_id))
(count, gene, transcript) = rest_record_id.split(".", 2)
(parsed_aa_change, pos, wt_aa, mt_aa) = index_to_aggregate_report_aa_change(aa_change, variant_type)
if line['Best Transcript'] == transcript and line['AA Change'] == parsed_aa_change:
return (mt_records_dict[record_id], wt_records_dict[record_id], variant_type, mt_aa, wt_aa)
Expand Down
46 changes: 21 additions & 25 deletions pvactools/lib/prediction_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,6 @@
from Bio import SeqIO
import random
import uuid
from mhcflurry.downloads import get_default_class1_presentation_models_dir
from mhcflurry.class1_presentation_predictor import Class1PresentationPredictor
import numpy

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

class IEDB(metaclass=ABCMeta):
@classmethod
Expand Down Expand Up @@ -323,28 +318,29 @@ def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb

all_epitopes = list(set(all_epitopes))
if len(all_epitopes) > 0:
models_dir = get_default_class1_presentation_models_dir(test_exists=True)
predictor = Class1PresentationPredictor.load(models_dir)
df = predictor.predict(
peptides=numpy.array(all_epitopes, dtype='object'),
n_flanks=None,
c_flanks=None,
alleles={allele: [allele]},
throw=True,
include_affinity_percentile=True,
verbose=0
)
tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
arguments = ["mhcflurry-predict", "--alleles", allele, "--out", tmp_output_file.name, "--peptides"]
arguments.extend(all_epitopes)
stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
try:
response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
except:
stderr_fh.close()
with open(stderr_fh.name, 'r') as fh:
err = fh.read()
os.unlink(stderr_fh.name)
raise Exception("An error occurred while calling MHCflurry:\n{}".format(err))
stderr_fh.close()
os.unlink(stderr_fh.name)
tmp_output_file.close()
df = pd.read_csv(tmp_output_file.name)
os.unlink(tmp_output_file.name)
df.rename(columns={
'prediction': 'ic50',
'affinity': 'ic50',
'prediction_percentile': 'percentile',
'affinity_percentile': 'percentile',
'processing_score': 'mhcflurry_processing_score',
'presentation_score': 'mhcflurry_presentation_score',
'presentation_percentile': 'mhcflurry_presentation_percentile',
'best_allele': 'allele',
'mhcflurry_prediction': 'ic50',
'mhcflurry_affinity': 'ic50',
'mhcflurry_prediction_percentile': 'percentile',
'mhcflurry_affinity_percentile': 'percentile'
}, inplace=True)
df.drop(labels='peptide_num', axis=1, inplace=True)
for record in SeqIO.parse(input_file, "fasta"):
seq_num = record.id
peptide = str(record.seq)
Expand Down
8 changes: 1 addition & 7 deletions pvactools/tools/pvacseq/generate_protein_fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,13 +166,7 @@ def parse_files(output_file, temp_dir, mutant_only, input_tsv, aggregate_report_
continue
else:
(rest_record_id, variant_type, aa_change) = record_id.rsplit(".", 2)
transcript_regex = '^.*(ENST[0-9|.]+)$'
transcript_p = re.compile(transcript_regex)
m = transcript_p.match(rest_record_id)
if m:
transcript = m.group(1)
else:
raise Exception("Unexpected record_id format: {}".format(record_id))
(peptide_type, count, gene, transcript) = rest_record_id.split(".", 3)
(parsed_aa_change, _, _, _) = index_to_aggregate_report_aa_change(aa_change, variant_type)
matches = [i for i in tsv_indexes if i['Best Transcript'] == transcript and i['AA Change'] == parsed_aa_change and i['Evaluation'] in aggregate_report_evaluation]
if len(matches) == 0:
Expand Down
16 changes: 14 additions & 2 deletions pvactools/tools/pvacview/server.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,13 @@ server <- shinyServer(function(input, output, session) {
df$allele_specific_anchors <- df$metricsData$`allele_specific_anchors`
df$anchor_contribution <- df$metricsData$`anchor_contribution_threshold`
hla <- df$metricsData$alleles
converted_hla_names <- unlist(lapply(hla, function(x) {strsplit(x, "HLA-")[[1]][2]}))
converted_hla_names <- unlist(lapply(hla, function(x) {
if (grepl("HLA-", x)) {
strsplit(x, "HLA-")[[1]][2]
} else {
x
}
}))
if (!("Ref Match" %in% colnames(df$mainTable))) {
df$mainTable$`Ref Match` <- "Not Run"
}
Expand Down Expand Up @@ -172,7 +178,13 @@ server <- shinyServer(function(input, output, session) {
df$allele_specific_anchors <- df$metricsData$`allele_specific_anchors`
df$anchor_contribution <- df$metricsData$`anchor_contribution_threshold`
hla <- df$metricsData$alleles
converted_hla_names <- unlist(lapply(hla, function(x) {strsplit(x, "HLA-")[[1]][2]}))
converted_hla_names <- unlist(lapply(hla, function(x) {
if (grepl("HLA-", x)) {
strsplit(x, "HLA-")[[1]][2]
} else {
x
}
}))
if (!("Ref Match" %in% colnames(df$mainTable))) {
df$mainTable$`Ref Match` <- "Not Run"
}
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@

setup(
name="pvactools",
version="4.0.3",
version="4.0.5",
packages=[
"pvactools.tools",
"pvactools.tools.pvacbind",
Expand Down
4 changes: 2 additions & 2 deletions tests/test_call_iedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ def test_mhcflurry_method_generates_expected_files(self):
])
if sys.platform == 'darwin':
expected_output_file = os.path.join(self.test_data_dir, 'output_mhcflurry_osx.tsv')
expected_df = pd.read_csv(expected_output_file, sep="\t", index_col=[0,8,9])
actual_df = pd.read_csv(call_iedb_output_file.name, sep="\t", index_col=[0,8,9])
expected_df = pd.read_csv(expected_output_file, sep="\t", index_col=[1,7,8])
actual_df = pd.read_csv(call_iedb_output_file.name, sep="\t", index_col=[1,7,8])
pd.testing.assert_frame_equal(expected_df, actual_df, check_like=True, check_exact=False, rtol=0.05)

def test_mhcnuggetsi_method_generates_expected_files(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
>WT.Rp1.ENSMUST00000027032.missense.1453N/S
>WT.1.Rp1.ENSMUST00000027032.missense.1453N/S
IAGTLKFNPETDYLTGTDG
>MT.Rp1.ENSMUST00000027032.missense.1453N/S
>MT.1.Rp1.ENSMUST00000027032.missense.1453N/S
IAGTLKFNPQTDYLTGTDG
Loading

0 comments on commit 8ce356e

Please sign in to comment.