diff --git a/CHANGELOG.md b/CHANGELOG.md index 80ce8cd..9150189 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,8 @@ # lehtiolab/ddamsproteomics: Changelog +## Version 2.14 [2023-08-22] +- Fixed off-by-one error in PTM site nr reporting on proteins (#17) + + ## Version 2.13 [2023-07-14] - Fixed PTMs/TMT18 bug which crashed pipeline diff --git a/Dockerfile b/Dockerfile index aaeeaae..86c73b9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,4 +5,4 @@ RUN apt update && apt install -y fontconfig && apt clean -y COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/ddamsproteomics-2.13/bin:$PATH +ENV PATH /opt/conda/envs/ddamsproteomics-2.14/bin:$PATH diff --git a/Singularity b/Singularity index 2a3bdd6..3538757 100644 --- a/Singularity +++ b/Singularity @@ -3,10 +3,10 @@ Bootstrap:docker %labels DESCRIPTION Singularity image containing all requirements for the lehtiolab/ddamsproteomics pipeline - VERSION 2.13 + VERSION 2.14 %environment - PATH=/opt/conda/envs/ddamsproteomics-2.13/bin:$PATH + PATH=/opt/conda/envs/ddamsproteomics-2.14/bin:$PATH export PATH %files diff --git a/bin/luciphor_parse.py b/bin/luciphor_parse.py index a719088..4caeaec 100755 --- a/bin/luciphor_parse.py +++ b/bin/luciphor_parse.py @@ -139,9 +139,9 @@ def annotate_protein_and_flanks(psm, ptmpsm, tdb, ptmnames): continue protseq = tdb[p].seq protptms = [] - site_protlocs = [ptm['site'][1] + x for x in peplocs] + site_protlocs = [ptm['site_report'] + x for x in peplocs] protlocs = '/'.join([str(x) for x in site_protlocs]) - protptms.append(f'{ptm["site"][0]}{protlocs}') + protptms.append(f'{ptm["aa"]}{protlocs}') flankpos = [(max(x-7, 0) , min(x+8, len(protseq))) for x in site_protlocs] flankseqs.update([str(protseq[x[0]:x[1]]) for x in flankpos]) proteins_loc[p].append('{}:{}'.format(ptm['name'], ','.join(protptms))) diff --git a/bin/luciphor_prep.py b/bin/luciphor_prep.py index 3a4920e..d923813 100755 --- a/bin/luciphor_prep.py +++ b/bin/luciphor_prep.py @@ -183,6 +183,13 @@ def lucimass_mod_dict(self): class PSM: + '''A PSM class containing mods, scores, FLR, etc + Mods are defined as dicts, and apart from other info like aa, type, mass, etc, + they contain two keys, site_lucin, site_report which are zero resp. one-based + residue indices for luciphor input (zero based) and reporting to PSM tables + (one based) + ''' + def __init__(self): self.mods = [] self.top_flr = False @@ -192,6 +199,24 @@ def __init__(self): self.sequence = False self.seq_in_scorepep_fmt = False + + def get_modtype(self, mod, labileptmnames, stableptmnames): + if not mod['var']: + mtype = 'fixed' + elif mod['name_lower'] in labileptmnames: + mtype = 'labile' + elif mod['name_lower'] in stableptmnames: + mtype = 'stable' + else: + mtype = 'variable' + return mtype + + def get_mod_dict(self, residue, sitenum, modptm, labileptmnames, stableptmnames): + return {'aa': residue, 'site_lucin': sitenum, 'site_report': sitenum + 1, + 'type': self.get_modtype(modptm, labileptmnames, stableptmnames), + 'mass': modptm['mass'], 'name': modptm['name'], + 'name_lower': modptm['name_lower'], 'adjusted_mass': modptm['adjusted_mass']} + def parse_msgf_peptide(self, msgfseq, msgf_mods, labileptmnames, stableptmnames): self.mods = [] barepep = '' @@ -210,25 +235,11 @@ def parse_msgf_peptide(self, msgfseq, msgf_mods, labileptmnames, stableptmnames) start = x.end() for mass in re.findall('[\+\-][0-9.]+', x.group(2)): mod = msgf_mods[float(mass)][0] # only take first, contains enough info - self.mods.append({ - 'site': (residue, sitenum), 'type': self.get_modtype(mod, labileptmnames, stableptmnames), - 'mass': mod['mass'], 'name': mod['name'], 'name_lower': mod['name_lower'], - 'adjusted_mass': mod['adjusted_mass'] - }) + self.mods.append(self.get_mod_dict(residue, sitenum, mod, labileptmnames, + stableptmnames)) self.sequence = f'{barepep}{msgfseq[start:]}' - def get_modtype(self, mod, labileptmnames, stableptmnames): - if not mod['var']: - mtype = 'fixed' - elif mod['name_lower'] in labileptmnames: - mtype = 'labile' - elif mod['name_lower'] in stableptmnames: - mtype = 'stable' - else: - mtype = 'variable' - return mtype - - def parse_luciphor_peptide(self, luciline, ptms_map, labileptms, stabileptms): + def parse_luciphor_peptide(self, luciline, ptms_map, labileptmnames, stableptmnames): '''From a luciphor sequence, create a peptide with PTMs ptms_map = {f'{residue}int(79 + mass_S/T/Y)': {'name': Phospho, etc} ''' @@ -243,13 +254,11 @@ def parse_luciphor_peptide(self, luciline, ptms_map, labileptms, stabileptms): barepep += modpep[start:x.start()+1] start = x.end() ptm = ptms_map[f'{x.group(1)}{int(x.group(2))}'] - if ptm['name_lower'] in labileptms: + if ptm['name_lower'] in labileptmnames: sitenum = len(barepep) - 1 if len(barepep) else -100 residue = barepep[-1] if len(barepep) else '[' - self.mods.append({ - 'site': (residue, sitenum), 'type': self.get_modtype(ptm, labileptms, stabileptms), - 'mass': ptm['mass'], 'name': ptm['name'], 'name_lower': ptm['name_lower'], - }) + self.mods.append(self.get_mod_dict(residue, sitenum, ptm, labileptmnames, + stableptmnames)) self.sequence = f'{barepep}{modpep[start:]}' self.seq_in_scorepep_fmt = re.sub(r'([A-Z])\[[0-9]+\]', lambda x: x.group(1).lower(), modpep) @@ -273,7 +282,7 @@ def luciphor_input_sites(self): lucimods = [] for m in self.mods: if m['type'] != 'fixed': - lucimods.append((m['site'][1], str(m['mass'] + aa_weights_monoiso[m['site'][0]]))) + lucimods.append((m['site_lucin'], str(m['mass'] + aa_weights_monoiso[m['aa']]))) return ','.join([f'{x[0]}={x[1]}' for x in lucimods]) def add_ptms_from_psm(self, psmmods): @@ -288,7 +297,7 @@ def topptm_output(self): for ptm in self.mods: if ptm['type'] not in output_types: continue - site = f'{ptm["site"][0]}{ptm["site"][1] + 1}' + site = f'{ptm["aa"]}{ptm["site_report"]}' try: ptmsites[ptm['name']].append(site) except KeyError: diff --git a/environment.yml b/environment.yml index bfee166..2fc74f6 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: ddamsproteomics-2.13 +name: ddamsproteomics-2.14 channels: - bioconda - conda-forge diff --git a/nextflow.config b/nextflow.config index ddc3fd9..833f8d4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,7 +20,7 @@ params { } // Container slug. Stable releases should specify release tag! -process.container = 'lehtiolab/ddamsproteomics:2.13' +process.container = 'lehtiolab/ddamsproteomics:2.14' //process.container = 'ddamsproteomics:dev' profiles { @@ -90,7 +90,7 @@ manifest { description = 'Quantitative DDA MS proteomics pipeline' mainScript = 'main.nf' nextflowVersion = '>=20.01.0' - version = '2.13' + version = '2.14' } // Function to ensure that resource requirements don't go beyond