Skip to content

Commit

Permalink
SBOL3 to SBOL2 conversion of BBa_J23101 simple Component, Sequence, a…
Browse files Browse the repository at this point in the history
…nd Activity
  • Loading branch information
jakebeal committed Oct 7, 2023
1 parent cf4cd95 commit e0d7d56
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 48 deletions.
6 changes: 3 additions & 3 deletions sbol_utilities/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from sbol_utilities.helper_functions import strip_sbol2_version, GENETIC_DESIGN_FILE_TYPES, \
find_top_level
from sbol_utilities.sbol3_genbank_conversion import GenBankSBOL3Converter
from sbol_utilities.sbol3_sbol2_conversion import SBOL2SBOL3Converter
import sbol_utilities.sbol3_sbol2_conversion
from sbol_utilities.workarounds import id_sort

# sbol javascript executable based on https://github.com/sboltools/sbolgraph
Expand Down Expand Up @@ -80,7 +80,7 @@ def convert2to3(sbol2_doc: Union[str, sbol2.Document], namespaces=None, use_nati
:return: equivalent SBOL3 document
"""
if use_native_converter:
return SBOL2SBOL3Converter.convert2to3(sbol2_doc)
return sbol_utilities.sbol3_sbol2_conversion.convert2to3(sbol2_doc)

# if we've started with a Document in memory, write it to a temp file
if namespaces is None:
Expand Down Expand Up @@ -181,7 +181,7 @@ def convert3to2(doc3: sbol3.Document, use_native_converter: bool = False) -> sbo
:return: equivalent SBOL2 document
"""
if use_native_converter:
return SBOL2SBOL3Converter.convert3to2(doc3)
return sbol_utilities.sbol3_sbol2_conversion.convert3to2(doc3)

# TODO: remove workarounds after conversion errors fixed in https://github.com/sboltools/sbolgraph/issues/16
# remap sequence encodings:
Expand Down
181 changes: 136 additions & 45 deletions sbol_utilities/sbol3_sbol2_conversion.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,100 @@
import sbol3
import sbol2

SBOL2_VERSION_PREDICATE = 'http://sboltools.org/backport#sbol2version'
# Namespaces
from rdflib import URIRef

BACKPORT_NAMESPACE = 'http://sboltools.org/backport#'
BACKPORT2_VERSION = f'{BACKPORT_NAMESPACE}sbol2version'
BACKPORT3_NAMESPACE = f'{BACKPORT_NAMESPACE}sbol3namespace'

class SBOL3ConverterVisitor:
NON_EXTENSION_PROPERTY_PREFIXES = {sbol3.SBOL3_NS, sbol3.SBOL2_NS, # SBOL 2 & 3 namespaces
sbol3.RDF_NS, sbol3.PROV_NS, sbol3.OM_NS, # Standard ontologies
BACKPORT_NAMESPACE} # Information added by this converter

doc2: sbol2.Document

def __init__(self, doc2: sbol2.Document):
self.doc2 = doc2
class SBOL3To2ConversionVisitor:
"""This class is used to map every object in an SBOL3 document into an empty SBOL2 document"""

def _convert_extension_properties(self, obj: sbol2.Identified):
"""Map over the other properties of an extension materials"""
pass
doc2: sbol2.Document

def _convert_identified(self, obj: sbol2.Identified):
def __init__(self, doc3: sbol3.Document):
# Create the target document
self.doc2 = sbol2.Document()
# # Immediately run the conversion
self._convert(doc3)

def _convert(self, doc3: sbol3.Document):
# Bind standard namespaces that aren't bound by default in pySBOL2
self.doc2.addNamespace(BACKPORT_NAMESPACE, 'backport')
self.doc2.addNamespace(sbol3.PROV_NS, 'prov')
self.doc2.addNamespace(sbol3.OM_NS, 'om')
self.doc2.addNamespace('http://purl.org/dc/terms/', 'dcterms')

# Override parameters that will otherwise interfere in conversion, saving old values
saved_compliance = sbol2.Config.getOption(sbol2.ConfigOptions.SBOL_COMPLIANT_URIS.value)
sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_COMPLIANT_URIS.value, False)
saved_homespace = sbol2.getHomespace()
sbol2.setHomespace('')

# Try conversion, resetting saved parameter values afterward
try:
doc3.accept(self)
finally:
sbol2.Config.setOption(sbol2.ConfigOptions.SBOL_COMPLIANT_URIS.value, saved_compliance)
sbol2.setHomespace(saved_homespace)

@staticmethod
def _convert_extension_properties(obj3: sbol3.Identified, obj2: sbol2.Identified):
"""Copy over extension properties"""
extension_properties = (p for p in obj3.properties
if not any(p.startswith(prefix) for prefix in NON_EXTENSION_PROPERTY_PREFIXES))
for p in extension_properties:
obj2.properties[p] = obj3._properties[p] # Can't use setPropertyValue because it may not be a string

def _convert_identified(self, obj3: sbol3.Identified, obj2: sbol2.Identified):
"""Map over the other properties of an identified object"""
self._convert_extension_properties(obj3, obj2)
# Map over equivalent properties
obj2.displayId = obj3.display_id
obj2.name = obj3.name
obj2.description = obj3.description
obj2.wasDerivedFrom = obj3.derived_from
obj2.wasGeneratedBy = obj3.generated_by
# Turn measures into extension properties
if obj3.measures:
raise NotImplementedError('Conversion of measures from SBOL3 to SBOL2 not yet implemented')
pass

def _convert_toplevel(self, obj: sbol2.TopLevel):
"""Map over the other properties of a toplevel object"""
self._convert_identified(obj)
pass
def _convert_toplevel(self, obj3: sbol3.TopLevel, obj2: sbol2.TopLevel):
"""Map over the other properties of a TopLevel object"""
self._convert_identified(obj3, obj2)
obj2.attachments = [a.identity for a in obj3.attachments]
obj2.properties[BACKPORT3_NAMESPACE] = [URIRef(obj3.namespace)]

@staticmethod
def _sbol2_version(obj: sbol3.Identified):
obj.sbol2_version = sbol3.TextProperty(obj, SBOL2_VERSION_PREDICATE, 0, 1)
obj.sbol2_version = sbol3.TextProperty(obj, BACKPORT2_VERSION, 0, 1)
return obj.sbol2_version or '1'

def visit_activity(self, a: sbol3.Activity):
raise NotImplementedError('Conversion of Activity from SBOL3 to SBOL2 not yet implemented')
def visit_activity(self, act3: sbol3.Activity):
# Make the Activity object and add it to the document
act2 = sbol2.Activity(act3.identity, version=self._sbol2_version(act3))
if act3.types:
if len(act3.types) > 1:
raise NotImplementedError('Conversion of multi-type activities to SBOL2 not yet implemented:'
'pySBOL2 currently supports a maximum of one type per activity'
'Bug: https://github.com/SynBioDex/pySBOL2/issues/428')
act2.types = act3.types[0] # Take first type from list of length 1
act2.startedAtTime = act3.start_time
act2.endedAtTime = act3.end_time
act2.usages = act3.usage
act2.associations = act3.association
# TODO: pySBOL3 is currently missing wasInformedBy (https://github.com/SynBioDex/pySBOL3/issues/436
# act2.wasInformedBy = act3.informed_by
self.doc2.activities.add(act2)
# Map over all other TopLevel properties and extensions not covered by the constructor
self._convert_toplevel(act3, act2)

def visit_agent(self, a: sbol3.Agent):
raise NotImplementedError('Conversion of Agent from SBOL3 to SBOL2 not yet implemented')
Expand All @@ -50,8 +114,32 @@ def visit_collection(self, a: sbol3.Collection):
def visit_combinatorial_derivation(self, a: sbol3.CombinatorialDerivation):
raise NotImplementedError('Conversion of CombinatorialDerivation from SBOL3 to SBOL2 not yet implemented')

def visit_component(self, a: sbol3.Component):
raise NotImplementedError('Conversion of Component from SBOL3 to SBOL2 not yet implemented')
def visit_component(self, cp3: sbol3.Component):
# Remap type if it's one of the ones that needs remapping; otherwise pass through unchanged
type_map = {sbol3.SBO_DNA: sbol2.BIOPAX_DNA, # TODO: distinguish biopax Dna from DnaRegion
sbol3.SBO_RNA: sbol2.BIOPAX_RNA, # TODO: distinguish biopax Rna from RnaRegion
sbol3.SBO_PROTEIN: sbol2.BIOPAX_PROTEIN,
sbol3.SBO_SIMPLE_CHEMICAL: sbol2.BIOPAX_SMALL_MOLECULE,
sbol3.SBO_NON_COVALENT_COMPLEX: sbol2.BIOPAX_COMPLEX}
types2 = [type_map.get(t, t) for t in cp3.types]
# Make the Component object and add it to the document
cp2 = sbol2.ComponentDefinition(cp3.identity, types2, version=self._sbol2_version(cp3))
self.doc2.addComponentDefinition(cp2)
# Convert the Component properties not covered by the constructor
cp2.roles = cp3.roles
cp2.sequences = cp3.sequences
if cp3.features:
raise NotImplementedError('Conversion of Component features from SBOL3 to SBOL2 not yet implemented')
if cp3.interactions:
raise NotImplementedError('Conversion of Component interactions from SBOL3 to SBOL2 not yet implemented')
if cp3.constraints:
raise NotImplementedError('Conversion of Component constraints from SBOL3 to SBOL2 not yet implemented')
if cp3.interface:
raise NotImplementedError('Conversion of Component interface from SBOL3 to SBOL2 not yet implemented')
if cp3.models:
raise NotImplementedError('Conversion of Component models from SBOL3 to SBOL2 not yet implemented')
# Map over all other TopLevel properties and extensions not covered by the constructor
self._convert_toplevel(cp3, cp2)

def visit_component_reference(self, a: sbol3.ComponentReference):
raise NotImplementedError('Conversion of ComponentReference from SBOL3 to SBOL2 not yet implemented')
Expand All @@ -62,8 +150,9 @@ def visit_constraint(self, a: sbol3.Constraint):
def visit_cut(self, a: sbol3.Cut):
raise NotImplementedError('Conversion of Cut from SBOL3 to SBOL2 not yet implemented')

def visit_document(self, a: sbol3.Document):
raise NotImplementedError('Conversion of Document from SBOL3 to SBOL2 not yet implemented')
def visit_document(self, doc3: sbol3.Document):
for obj in doc3.objects:
obj.accept(self)

def visit_entire_sequence(self, a: sbol3.EntireSequence):
raise NotImplementedError('Conversion of EntireSequence from SBOL3 to SBOL2 not yet implemented')
Expand Down Expand Up @@ -110,12 +199,17 @@ def visit_range(self, a: sbol3.Range):
def visit_si_prefix(self, a: sbol3.SIPrefix):
raise NotImplementedError('Conversion of SIPrefix from SBOL3 to SBOL2 not yet implemented')

def visit_sequence(self, seq: sbol3.Sequence):
def visit_sequence(self, seq3: sbol3.Sequence):
# Remap encoding if it's one of the ones that needs remapping; otherwise pass through unchanged
encoding_map = {sbol3.IUPAC_DNA_ENCODING: sbol2.SBOL_ENCODING_IUPAC,
sbol3.IUPAC_PROTEIN_ENCODING: sbol2.SBOL_ENCODING_IUPAC_PROTEIN,
sbol3.SMILES_ENCODING: sbol2.SBOL_ENCODING_SMILES}
encoding2 = encoding_map.get(seq3.encoding, seq3.encoding)
# Make the Sequence object and add it to the document
seq = sbol2.Sequence(seq.identity, seq.elements, encoding=seq.encoding, version=self._sbol2_version(seq))
self.doc2.addSequence(seq)
# Add all of the other TopLevel properties and extensions not already covered
self._convert_toplevel(seq)
seq2 = sbol2.Sequence(seq3.identity, seq3.elements, encoding=encoding2, version=self._sbol2_version(seq3))
self.doc2.addSequence(seq2)
# Map over all other TopLevel properties and extensions not covered by the constructor
self._convert_toplevel(seq3, seq2)

def visit_sequence_feature(self, a: sbol3.SequenceFeature):
raise NotImplementedError('Conversion of SequenceFeature from SBOL3 to SBOL2 not yet implemented')
Expand All @@ -142,25 +236,22 @@ def visit_variable_feature(self, a: sbol3.VariableFeature):
raise NotImplementedError('Conversion of VariableFeature from SBOL3 to SBOL2 not yet implemented')


class SBOL2SBOL3Converter:
@staticmethod
def convert3to2(doc3: sbol3.Document) -> sbol2.Document:
"""Convert an SBOL3 document to an SBOL2 document
def convert3to2(doc3: sbol3.Document) -> sbol2.Document:
"""Convert an SBOL3 document to an SBOL2 document
:param doc3: SBOL3 document to convert
:returns: SBOL2 document
"""
doc2 = sbol2.Document()
doc3.accept(SBOL3ConverterVisitor(doc2))
return doc2
:param doc3: SBOL3 document to convert
:returns: SBOL2 document
"""
converter = SBOL3To2ConversionVisitor(doc3)
return converter.doc2

@staticmethod
def convert2to3(doc2: sbol2.Document) -> sbol3.Document:
"""Convert an SBOL2 document to an SBOL3 document
:param doc2: SBOL2 document to convert
:returns: SBOL3 document
"""
doc3 = sbol3.Document()
# TODO: build converter here
return doc3

def convert2to3(doc2: sbol2.Document) -> sbol3.Document:
"""Convert an SBOL2 document to an SBOL3 document
:param doc2: SBOL2 document to convert
:returns: SBOL3 document
"""
doc3 = sbol3.Document()
# TODO: build converter here
return doc3
62 changes: 62 additions & 0 deletions test/test_files/BBa_J23101.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<rdf:RDF xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:backport="http://sboltools.org/backport#" xmlns:prov="http://www.w3.org/ns/prov#" xmlns:om="http://www.ontology-of-units-of-measure.org/resource/om-2/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:sbol="http://sbols.org/v2#" xmlns:ns0="http://wiki.synbiohub.org/wiki/Terms/synbiohub#" xmlns:ns1="http://purl.org/dc/elements/1.1/" xmlns:ns2="http://wiki.synbiohub.org/wiki/Terms/igem#">
<sbol:ComponentDefinition rdf:about="https://synbiohub.org/public/igem/BBa_J23101">
<ns0:mutableDescription>later</ns0:mutableDescription>
<ns0:ownedBy rdf:resource="https://synbiohub.org/user/myers"/>
<sbol:type rdf:resource="http://www.biopax.org/release/biopax-level3.owl#DnaRegion"/>
<sbol:version>1</sbol:version>
<ns0:mutableNotes>N/A</ns0:mutableNotes>
<ns2:sampleStatus>In stock</ns2:sampleStatus>
<dcterms:title>BBa_J23101</dcterms:title>
<ns2:partStatus>Released HQ 2013</ns2:partStatus>
<ns0:bookmark>true</ns0:bookmark>
<ns0:mutableProvenance>later</ns0:mutableProvenance>
<ns0:topLevel rdf:resource="https://synbiohub.org/public/igem/BBa_J23101"/>
<ns2:dominant>true</ns2:dominant>
<sbol:role rdf:resource="http://identifiers.org/so/SO:0000167"/>
<ns0:star>true</ns0:star>
<ns0:ownedBy rdf:resource="https://synbiohub.org/user/james"/>
<backport:sbol3namespace rdf:resource="https://synbiohub.org"/>
<prov:wasDerivedFrom rdf:resource="http://parts.igem.org/Part:BBa_J23101"/>
<ns2:discontinued>false</ns2:discontinued>
<dcterms:modified>2015-08-31T04:08:40Z</dcterms:modified>
<sbol:persistentIdentity rdf:resource="https://synbiohub.org/public/igem/BBa_J23101"/>
<dcterms:created>2006-08-03T11:00:00Z</dcterms:created>
<sbol:sequence rdf:resource="https://synbiohub.org/public/igem/BBa_J23101_sequence"/>
<sbol:displayId>BBa_J23101</sbol:displayId>
<dcterms:description>constitutive promoter family member</dcterms:description>
<ns2:status rdf:resource="http://wiki.synbiohub.org/wiki/Terms/igem#status/Available"/>
<sbol:role rdf:resource="http://wiki.synbiohub.org/wiki/Terms/igem#partType/Regulatory"/>
<ns2:owner_id>483</ns2:owner_id>
<ns2:owning_group_id>95</ns2:owning_group_id>
<ns2:group_u_list>_52_</ns2:group_u_list>
<prov:wasGeneratedBy rdf:resource="https://synbiohub.org/public/igem/igem2sbol"/>
<ns1:creator>John Anderson</ns1:creator>
<ns2:m_user_id>0</ns2:m_user_id>
<ns2:experience rdf:resource="http://wiki.synbiohub.org/wiki/Terms/igem#experience/Works"/>
</sbol:ComponentDefinition>
<sbol:Sequence rdf:about="https://synbiohub.org/public/igem/BBa_J23101_sequence">
<sbol:displayId>BBa_J23101_sequence</sbol:displayId>
<ns0:ownedBy rdf:resource="https://synbiohub.org/user/james"/>
<backport:sbol3namespace rdf:resource="https://synbiohub.org"/>
<sbol:persistentIdentity rdf:resource="https://synbiohub.org/public/igem/BBa_J23101_sequence"/>
<prov:wasDerivedFrom rdf:resource="http://parts.igem.org/Part:BBa_J23101"/>
<prov:wasGeneratedBy rdf:resource="https://synbiohub.org/public/igem/igem2sbol"/>
<ns0:topLevel rdf:resource="https://synbiohub.org/public/igem/BBa_J23101_sequence"/>
<sbol:encoding rdf:resource="http://www.chem.qmul.ac.uk/iubmb/misc/naseq.html"/>
<sbol:elements>tttacagctagctcagtcctaggtattatgctagc</sbol:elements>
<ns0:ownedBy rdf:resource="https://synbiohub.org/user/myers"/>
<sbol:version>1</sbol:version>
</sbol:Sequence>
<prov:Activity rdf:about="https://synbiohub.org/public/igem/igem2sbol">
<ns1:creator>Chris J. Myers</ns1:creator>
<ns0:ownedBy rdf:resource="https://synbiohub.org/user/myers"/>
<prov:endedAtTime>2017-03-06T15:00:00+00:00</prov:endedAtTime>
<sbol:version>1</sbol:version>
<ns0:topLevel rdf:resource="https://synbiohub.org/public/igem/igem2sbol"/>
<backport:sbol3namespace rdf:resource="https://synbiohub.org"/>
<ns1:creator>James Alastair McLaughlin</ns1:creator>
<ns0:ownedBy rdf:resource="https://synbiohub.org/user/james"/>
<sbol:persistentIdentity rdf:resource="https://synbiohub.org/public/igem/igem2sbol"/>
<sbol:displayId>igem2sbol</sbol:displayId>
</prov:Activity>
</rdf:RDF>
30 changes: 30 additions & 0 deletions test/test_sbol2_sbol3_direct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import tempfile
from pathlib import Path

import unittest

import sbol2
import sbol3

from sbol_utilities.conversion import convert2to3, convert3to2
from sbol_utilities.sbol_diff import file_diff

TEST_FILES = Path(__file__).parent / 'test_files'


class TestDirectSBOL2SBOL3Conversion(unittest.TestCase):

def test_3to2_conversion(self):
"""Test ability to convert a simple part from SBOL3 to SBOL2"""
# Load an SBOL3 document and check its contents
doc3 = sbol3.Document()
doc3.read(TEST_FILES / 'BBa_J23101.nt')
# Convert to SBOL2 and check contents
doc2 = convert3to2(doc3, True)
with tempfile.NamedTemporaryFile(suffix='.xml') as tmp:
doc2.write(tmp.name)
self.assertFalse(file_diff(tmp.name, str(TEST_FILES / 'BBa_J23101.xml')))


if __name__ == '__main__':
unittest.main()

0 comments on commit e0d7d56

Please sign in to comment.