Skip to content

Commit

Permalink
Fix off-by-one error in PTM protein site nr reporting #17
Browse files Browse the repository at this point in the history
  • Loading branch information
glormph committed Aug 22, 2023
1 parent ba84b7c commit b3e8c54
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 26 deletions.
4 changes: 2 additions & 2 deletions bin/luciphor_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,9 @@ def annotate_protein_and_flanks(psm, ptmpsm, tdb, ptmnames):
continue
protseq = tdb[p].seq
protptms = []
site_protlocs = [ptm['site'][1] + x for x in peplocs]
site_protlocs = [ptm['site_report'] + x for x in peplocs]
protlocs = '/'.join([str(x) for x in site_protlocs])
protptms.append(f'{ptm["site"][0]}{protlocs}')
protptms.append(f'{ptm["aa"]}{protlocs}')
flankpos = [(max(x-7, 0) , min(x+8, len(protseq))) for x in site_protlocs]
flankseqs.update([str(protseq[x[0]:x[1]]) for x in flankpos])
proteins_loc[p].append('{}:{}'.format(ptm['name'], ','.join(protptms)))
Expand Down
57 changes: 33 additions & 24 deletions bin/luciphor_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,13 @@ def lucimass_mod_dict(self):


class PSM:
'''A PSM class containing mods, scores, FLR, etc
Mods are defined as dicts, and apart from other info like aa, type, mass, etc,
they contain two keys, site_lucin, site_report which are zero resp. one-based
residue indices for luciphor input (zero based) and reporting to PSM tables
(one based)
'''

def __init__(self):
self.mods = []
self.top_flr = False
Expand All @@ -192,6 +199,24 @@ def __init__(self):
self.sequence = False
self.seq_in_scorepep_fmt = False


def get_modtype(self, mod, labileptmnames, stableptmnames):
if not mod['var']:
mtype = 'fixed'
elif mod['name_lower'] in labileptmnames:
mtype = 'labile'
elif mod['name_lower'] in stableptmnames:
mtype = 'stable'
else:
mtype = 'variable'
return mtype

def get_mod_dict(self, residue, sitenum, modptm, labileptmnames, stableptmnames):
return {'aa': residue, 'site_lucin': sitenum, 'site_report': sitenum + 1,
'type': self.get_modtype(modptm, labileptmnames, stableptmnames),
'mass': modptm['mass'], 'name': modptm['name'],
'name_lower': modptm['name_lower'], 'adjusted_mass': modptm['adjusted_mass']}

def parse_msgf_peptide(self, msgfseq, msgf_mods, labileptmnames, stableptmnames):
self.mods = []
barepep = ''
Expand All @@ -210,25 +235,11 @@ def parse_msgf_peptide(self, msgfseq, msgf_mods, labileptmnames, stableptmnames)
start = x.end()
for mass in re.findall('[\+\-][0-9.]+', x.group(2)):
mod = msgf_mods[float(mass)][0] # only take first, contains enough info
self.mods.append({
'site': (residue, sitenum), 'type': self.get_modtype(mod, labileptmnames, stableptmnames),
'mass': mod['mass'], 'name': mod['name'], 'name_lower': mod['name_lower'],
'adjusted_mass': mod['adjusted_mass']
})
self.mods.append(self.get_mod_dict(residue, sitenum, mod, labileptmnames,
stableptmnames))
self.sequence = f'{barepep}{msgfseq[start:]}'

def get_modtype(self, mod, labileptmnames, stableptmnames):
if not mod['var']:
mtype = 'fixed'
elif mod['name_lower'] in labileptmnames:
mtype = 'labile'
elif mod['name_lower'] in stableptmnames:
mtype = 'stable'
else:
mtype = 'variable'
return mtype

def parse_luciphor_peptide(self, luciline, ptms_map, labileptms, stabileptms):
def parse_luciphor_peptide(self, luciline, ptms_map, labileptmnames, stableptmnames):
'''From a luciphor sequence, create a peptide with PTMs
ptms_map = {f'{residue}int(79 + mass_S/T/Y)': {'name': Phospho, etc}
'''
Expand All @@ -243,13 +254,11 @@ def parse_luciphor_peptide(self, luciline, ptms_map, labileptms, stabileptms):
barepep += modpep[start:x.start()+1]
start = x.end()
ptm = ptms_map[f'{x.group(1)}{int(x.group(2))}']
if ptm['name_lower'] in labileptms:
if ptm['name_lower'] in labileptmnames:
sitenum = len(barepep) - 1 if len(barepep) else -100
residue = barepep[-1] if len(barepep) else '['
self.mods.append({
'site': (residue, sitenum), 'type': self.get_modtype(ptm, labileptms, stabileptms),
'mass': ptm['mass'], 'name': ptm['name'], 'name_lower': ptm['name_lower'],
})
self.mods.append(self.get_mod_dict(residue, sitenum, ptm, labileptmnames,
stableptmnames))
self.sequence = f'{barepep}{modpep[start:]}'
self.seq_in_scorepep_fmt = re.sub(r'([A-Z])\[[0-9]+\]', lambda x: x.group(1).lower(), modpep)

Expand All @@ -273,7 +282,7 @@ def luciphor_input_sites(self):
lucimods = []
for m in self.mods:
if m['type'] != 'fixed':
lucimods.append((m['site'][1], str(m['mass'] + aa_weights_monoiso[m['site'][0]])))
lucimods.append((m['site_lucin'], str(m['mass'] + aa_weights_monoiso[m['aa']])))
return ','.join([f'{x[0]}={x[1]}' for x in lucimods])

def add_ptms_from_psm(self, psmmods):
Expand All @@ -288,7 +297,7 @@ def topptm_output(self):
for ptm in self.mods:
if ptm['type'] not in output_types:
continue
site = f'{ptm["site"][0]}{ptm["site"][1] + 1}'
site = f'{ptm["aa"]}{ptm["site_report"]}'
try:
ptmsites[ptm['name']].append(site)
except KeyError:
Expand Down

0 comments on commit b3e8c54

Please sign in to comment.