Merge pull request #18 from lehtiolab/dev

Release 2.14
lehtiolab · Aug 22, 2023 · 1ad76f6 · 1ad76f6
2 parents ba84b7c + 09d8511
commit 1ad76f6
Show file tree

Hide file tree

Showing 7 changed files with 45 additions and 32 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,4 +1,8 @@
 # lehtiolab/ddamsproteomics: Changelog
+## Version 2.14 [2023-08-22]
+- Fixed off-by-one error in PTM site nr reporting on proteins (#17)
+
+
 ## Version 2.13 [2023-07-14]
 - Fixed PTMs/TMT18 bug which crashed pipeline
 

diff --git a/Dockerfile b/Dockerfile
@@ -5,4 +5,4 @@ RUN apt update && apt install -y fontconfig && apt clean -y
 
 COPY environment.yml /
 RUN conda env create -f /environment.yml && conda clean -a
-ENV PATH /opt/conda/envs/ddamsproteomics-2.13/bin:$PATH
+ENV PATH /opt/conda/envs/ddamsproteomics-2.14/bin:$PATH
diff --git a/Singularity b/Singularity
@@ -3,10 +3,10 @@ Bootstrap:docker
 
 %labels
     DESCRIPTION Singularity image containing all requirements for the lehtiolab/ddamsproteomics pipeline
-    VERSION 2.13
+    VERSION 2.14
 
 %environment
-    PATH=/opt/conda/envs/ddamsproteomics-2.13/bin:$PATH
+    PATH=/opt/conda/envs/ddamsproteomics-2.14/bin:$PATH
     export PATH
 
 %files

diff --git a/bin/luciphor_parse.py b/bin/luciphor_parse.py
@@ -139,9 +139,9 @@ def annotate_protein_and_flanks(psm, ptmpsm, tdb, ptmnames):
                 continue
             protseq = tdb[p].seq
             protptms = []
-            site_protlocs = [ptm['site'][1] + x for x in peplocs]
+            site_protlocs = [ptm['site_report'] + x for x in peplocs]
             protlocs = '/'.join([str(x) for x in site_protlocs])
-            protptms.append(f'{ptm["site"][0]}{protlocs}')
+            protptms.append(f'{ptm["aa"]}{protlocs}')
             flankpos = [(max(x-7, 0) , min(x+8, len(protseq))) for x in site_protlocs]
             flankseqs.update([str(protseq[x[0]:x[1]]) for x in flankpos])
             proteins_loc[p].append('{}:{}'.format(ptm['name'], ','.join(protptms)))

diff --git a/bin/luciphor_prep.py b/bin/luciphor_prep.py
@@ -183,6 +183,13 @@ def lucimass_mod_dict(self):
 
 
 class PSM: 
+    '''A PSM class containing mods, scores, FLR, etc
+    Mods are defined as dicts, and apart from other info like aa, type, mass, etc,
+    they contain two keys, site_lucin, site_report which are zero resp. one-based
+    residue indices for luciphor input (zero based) and reporting to PSM tables
+    (one based)
+    '''
+
     def __init__(self):
         self.mods = []
         self.top_flr = False
@@ -192,6 +199,24 @@ def __init__(self):
         self.sequence = False
         self.seq_in_scorepep_fmt = False
 
+
+    def get_modtype(self, mod, labileptmnames, stableptmnames):
+        if not mod['var']:
+            mtype = 'fixed'
+        elif mod['name_lower'] in labileptmnames:
+            mtype = 'labile'
+        elif mod['name_lower'] in stableptmnames:
+            mtype = 'stable'
+        else:
+            mtype = 'variable'
+        return mtype
+
+    def get_mod_dict(self, residue, sitenum, modptm, labileptmnames, stableptmnames):
+        return {'aa': residue, 'site_lucin': sitenum, 'site_report': sitenum + 1,
+                'type': self.get_modtype(modptm, labileptmnames, stableptmnames),
+                'mass': modptm['mass'], 'name': modptm['name'],
+                'name_lower': modptm['name_lower'], 'adjusted_mass': modptm['adjusted_mass']}
+
     def parse_msgf_peptide(self, msgfseq, msgf_mods, labileptmnames, stableptmnames):
         self.mods = []
         barepep = ''
@@ -210,25 +235,11 @@ def parse_msgf_peptide(self, msgfseq, msgf_mods, labileptmnames, stableptmnames)
             start = x.end()
             for mass in re.findall('[\+\-][0-9.]+', x.group(2)):
                 mod = msgf_mods[float(mass)][0] # only take first, contains enough info
-                self.mods.append({
-                    'site': (residue, sitenum), 'type': self.get_modtype(mod, labileptmnames, stableptmnames),
-                    'mass': mod['mass'], 'name': mod['name'], 'name_lower': mod['name_lower'],
-                    'adjusted_mass': mod['adjusted_mass']
-                    })
+                self.mods.append(self.get_mod_dict(residue, sitenum, mod, labileptmnames,
+                    stableptmnames))
         self.sequence = f'{barepep}{msgfseq[start:]}'
 
-    def get_modtype(self, mod, labileptmnames, stableptmnames):
-        if not mod['var']:
-            mtype = 'fixed'
-        elif mod['name_lower'] in labileptmnames:
-            mtype = 'labile'
-        elif mod['name_lower'] in stableptmnames:
-            mtype = 'stable'
-        else:
-            mtype = 'variable'
-        return mtype
-
-    def parse_luciphor_peptide(self, luciline, ptms_map, labileptms, stabileptms):
+    def parse_luciphor_peptide(self, luciline, ptms_map, labileptmnames, stableptmnames):
         '''From a luciphor sequence, create a peptide with PTMs
         ptms_map = {f'{residue}int(79 + mass_S/T/Y)': {'name': Phospho, etc}
         '''
@@ -243,13 +254,11 @@ def parse_luciphor_peptide(self, luciline, ptms_map, labileptms, stabileptms):
                 barepep += modpep[start:x.start()+1]
             start = x.end()
             ptm = ptms_map[f'{x.group(1)}{int(x.group(2))}']
-            if ptm['name_lower'] in labileptms:
+            if ptm['name_lower'] in labileptmnames:
                 sitenum = len(barepep) - 1 if len(barepep) else -100
                 residue = barepep[-1] if len(barepep) else '['
-                self.mods.append({
-                    'site': (residue, sitenum), 'type': self.get_modtype(ptm, labileptms, stabileptms),
-                    'mass': ptm['mass'], 'name': ptm['name'], 'name_lower': ptm['name_lower'],
-                    })
+                self.mods.append(self.get_mod_dict(residue, sitenum, ptm, labileptmnames,
+                    stableptmnames))
         self.sequence = f'{barepep}{modpep[start:]}'
         self.seq_in_scorepep_fmt = re.sub(r'([A-Z])\[[0-9]+\]', lambda x: x.group(1).lower(), modpep)
 
@@ -273,7 +282,7 @@ def luciphor_input_sites(self):
         lucimods = []
         for m in self.mods:
             if m['type'] != 'fixed':
-                lucimods.append((m['site'][1], str(m['mass'] + aa_weights_monoiso[m['site'][0]])))
+                lucimods.append((m['site_lucin'], str(m['mass'] + aa_weights_monoiso[m['aa']])))
         return ','.join([f'{x[0]}={x[1]}' for x in lucimods])
 
     def add_ptms_from_psm(self, psmmods):
@@ -288,7 +297,7 @@ def topptm_output(self):
         for ptm in self.mods:
             if ptm['type'] not in output_types:
                 continue
-            site = f'{ptm["site"][0]}{ptm["site"][1] + 1}'
+            site = f'{ptm["aa"]}{ptm["site_report"]}'
             try:
                 ptmsites[ptm['name']].append(site)
             except KeyError:

diff --git a/environment.yml b/environment.yml
@@ -1,4 +1,4 @@
-name: ddamsproteomics-2.13
+name: ddamsproteomics-2.14
 channels:
   - bioconda
   - conda-forge

diff --git a/nextflow.config b/nextflow.config
@@ -20,7 +20,7 @@ params {
 }
 
 // Container slug. Stable releases should specify release tag!
-process.container = 'lehtiolab/ddamsproteomics:2.13'
+process.container = 'lehtiolab/ddamsproteomics:2.14'
 //process.container = 'ddamsproteomics:dev'
 
 profiles {
@@ -90,7 +90,7 @@ manifest {
   description = 'Quantitative DDA MS proteomics pipeline'
   mainScript = 'main.nf'
   nextflowVersion = '>=20.01.0'
-  version = '2.13'
+  version = '2.14'
 }
 
 // Function to ensure that resource requirements don't go beyond