Skip to content

Commit

Permalink
Change AppDomain tool to output sdc instead of inside-app-domain
Browse files Browse the repository at this point in the history
  • Loading branch information
ben-clancy committed Sep 17, 2024
1 parent 225cf29 commit 8fc1b20
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 23 deletions.
13 changes: 6 additions & 7 deletions applicabilityDomain/applicabilityDomainScript.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from chembl_structure_pipeline import standardizer
from rdkit.Chem import rdFingerprintGenerator

APPLICABILITY_DOMAIN_THRESHOLD = 0.2

def argParse():
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--inputFile", nargs=1, help="Location of the csv containing the SMILES you wish to test")
Expand All @@ -32,6 +30,7 @@ def __init__(self, name, original):
self.smiles = None
self.fp = None


def standardize(compounds):
for compound in compounds:
try:
Expand Down Expand Up @@ -74,21 +73,21 @@ def getCompounds(path):
def applicabilityDomain(testCompounds, proteinCompounds):
for compound in testCompounds:
if compound.fp != None:
outsideApplicabilityDomain = {}
sdc = {}
maxTanimotoSimilarity = 0
for proteinCompound in proteinCompounds:
maxTanimotoSimilarity = max(maxTanimotoSimilarity, DataStructs.FingerprintSimilarity(compound.fp, proteinCompound.fp, metric=DataStructs.TanimotoSimilarity))
outsideApplicabilityDomain = maxTanimotoSimilarity > APPLICABILITY_DOMAIN_THRESHOLD
compound.outsideApplicabilityDomain = outsideApplicabilityDomain
sdc = maxTanimotoSimilarity
compound.sdc = sdc

def write_csv(compounds, outputChannel):
writer = csv.writer(outputChannel, lineterminator='\n')

header = ["Name", "SMILES (original)", "SMILES (standardized)", "Within Applicability Domain"]
header = ["Name", "SMILES (original)", "SMILES (standardized)", "SDC"]
_ = writer.writerow(header)

for compound in compounds:
row = [compound.name, compound.original, compound.smiles, compound.outsideApplicabilityDomain == False]
row = [compound.name, compound.original, compound.smiles, compound.sdc]
_ = writer.writerow(row)

if __name__ == "__main__":
Expand Down
32 changes: 16 additions & 16 deletions applicabilityDomain/example_output.csv
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
Name,SMILES (original),SMILES (standardized),Within Applicability Domain
Verapamil,COC1=C(OC)C=C(CCN(C)CCCC(C#N)(C(C)C)C2=CC(OC)=C(OC)C=C2)C=C1,COc1ccc(CCN(C)CCCC(C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,False
Lopinavir,CC1=C(C(=CC=C1)C)OCC(=O)NC(CC2=CC=CC=C2)C(CC(CC3=CC=CC=C3)NC(=O)C(C(C)C)N4CCCNC4=O)O,Cc1cccc(C)c1OCC(=O)NC(Cc1ccccc1)C(O)CC(Cc1ccccc1)NC(=O)C(C(C)C)N1CCCNC1=O,False
Probenecid,CCCN(CCC)S(=O)(=O)C1=CC=C(C=C1)C(=O)O,CCCN(CCC)S(=O)(=O)c1ccc(C(=O)O)cc1,True
Saquinavir,CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(C(CC3=CC=CC=C3)NC(=O)C(CC(=O)N)NC(=O)C4=NC5=CC=CC=C5C=C4)O,CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1,False
Bromosulphophthalein,[O-]S(=O)(=O)c1c(O)ccc(c1)C3(OC(=O)c2c(Br)c(Br)c(Br)c(Br)c23)c4ccc(O)c(c4)S([O-])(=O)=O,O=C1OC(c2ccc(O)c(S(=O)(=O)O)c2)(c2ccc(O)c(S(=O)(=O)O)c2)c2c(Br)c(Br)c(Br)c(Br)c21,True
Methotrexate,CN(CC1=CN=C2C(=N1)C(=NC(=N2)N)N)C3=CC=C(C=C3)C(=O)NC(CCC(=O)O)C(=O)O,CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1,True
Furosemide,C1=COC(=C1)CNC2=CC(=C(C=C2C(=O)O)S(=O)(=O)N)Cl,NS(=O)(=O)c1cc(C(=O)O)c(NCc2ccco2)cc1Cl,False
Metformin,CN(C)C(=N)N=C(N)N,CN(C)C(=N)N=C(N)N,False
Cimetidine,CC1=C(N=CN1)CSCCNC(=NC)NC#N,CN=C(NC#N)NCCSCc1nc[nH]c1C,False
Procainamide,CCN(CC)CCNC(=O)C1=CC=C(C=C1)N,CCN(CC)CCNC(=O)c1ccc(N)cc1,True
Oestrone,CC12CCC3C(C1CCC2=O)CCC4=C3C=CC(=C4)O,CC12CCC3c4ccc(O)cc4CCC3C1CCC2=O,True
Pravastatin,CCC(C)C(=O)OC1CC(C=C2C1C(C(C=C2)C)CCC(CC(CC(=O)O)O)O)O,CCC(C)C(=O)OC1CC(O)C=C2C=CC(C)C(CCC(O)CC(O)CC(=O)O)C21,True
Delaviridine,CC(C)NC1=C(N=CC=C1)N1CCN(CC1)C(=O)C1=CC2=C(N1)C=CC(NS(C)(=O)=O)=C2,CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1,False
Loperamide,CN(C)C(=O)C(CCN1CCC(O)(CC1)C1=CC=C(Cl)C=C1)(C1=CC=CC=C1)C1=CC=CC=C1,CN(C)C(=O)C(CCN1CCC(O)(c2ccc(Cl)cc2)CC1)(c1ccccc1)c1ccccc1,False
Rifampicin,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,COC1C=COC2(C)Oc3c(C)c(O)c4c(O)c(c(C=NN5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)C(C)=CC=CC(C)C(O)C(C)C(O)C(C)C(OC(C)=O)C1C,True
Name,SMILES (original),SMILES (standardized),SDC
Verapamil,COC1=C(OC)C=C(CCN(C)CCCC(C#N)(C(C)C)C2=CC(OC)=C(OC)C=C2)C=C1,COc1ccc(CCN(C)CCCC(C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,0.23529411764705882
Lopinavir,CC1=C(C(=CC=C1)C)OCC(=O)NC(CC2=CC=CC=C2)C(CC(CC3=CC=CC=C3)NC(=O)C(C(C)C)N4CCCNC4=O)O,Cc1cccc(C)c1OCC(=O)NC(Cc1ccccc1)C(O)CC(Cc1ccccc1)NC(=O)C(C(C)C)N1CCCNC1=O,0.21794871794871795
Probenecid,CCCN(CCC)S(=O)(=O)C1=CC=C(C=C1)C(=O)O,CCCN(CCC)S(=O)(=O)c1ccc(C(=O)O)cc1,0.19696969696969696
Saquinavir,CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(C(CC3=CC=CC=C3)NC(=O)C(CC(=O)N)NC(=O)C4=NC5=CC=CC=C5C=C4)O,CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1,0.20454545454545456
Bromosulphophthalein,[O-]S(=O)(=O)c1c(O)ccc(c1)C3(OC(=O)c2c(Br)c(Br)c(Br)c(Br)c23)c4ccc(O)c(c4)S([O-])(=O)=O,O=C1OC(c2ccc(O)c(S(=O)(=O)O)c2)(c2ccc(O)c(S(=O)(=O)O)c2)c2c(Br)c(Br)c(Br)c(Br)c21,0.1509433962264151
Methotrexate,CN(CC1=CN=C2C(=N1)C(=NC(=N2)N)N)C3=CC=C(C=C3)C(=O)NC(CCC(=O)O)C(=O)O,CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1,0.15789473684210525
Furosemide,C1=COC(=C1)CNC2=CC(=C(C=C2C(=O)O)S(=O)(=O)N)Cl,NS(=O)(=O)c1cc(C(=O)O)c(NCc2ccco2)cc1Cl,0.21666666666666667
Metformin,CN(C)C(=N)N=C(N)N,CN(C)C(=N)N=C(N)N,0.36
Cimetidine,CC1=C(N=CN1)CSCCNC(=NC)NC#N,CN=C(NC#N)NCCSCc1nc[nH]c1C,0.6862745098039216
Procainamide,CCN(CC)CCNC(=O)C1=CC=C(C=C1)N,CCN(CC)CCNC(=O)c1ccc(N)cc1,0.19117647058823528
Oestrone,CC12CCC3C(C1CCC2=O)CCC4=C3C=CC(=C4)O,CC12CCC3c4ccc(O)cc4CCC3C1CCC2=O,0.13043478260869565
Pravastatin,CCC(C)C(=O)OC1CC(C=C2C1C(C(C=C2)C)CCC(CC(CC(=O)O)O)O)O,CCC(C)C(=O)OC1CC(O)C=C2C=CC(C)C(CCC(O)CC(O)CC(=O)O)C21,0.13953488372093023
Delaviridine,CC(C)NC1=C(N=CC=C1)N1CCN(CC1)C(=O)C1=CC2=C(N1)C=CC(NS(C)(=O)=O)=C2,CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1,0.21978021978021978
Loperamide,CN(C)C(=O)C(CCN1CCC(O)(CC1)C1=CC=C(Cl)C=C1)(C1=CC=CC=C1)C1=CC=CC=C1,CN(C)C(=O)C(CCN1CCC(O)(c2ccc(Cl)cc2)CC1)(c1ccccc1)c1ccccc1,0.20833333333333334
Rifampicin,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,COC1C=COC2(C)Oc3c(C)c(O)c4c(O)c(c(C=NN5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)C(C)=CC=CC(C)C(O)C(C)C(O)C(C)C(OC(C)=O)C1C,0.11643835616438356

0 comments on commit 8fc1b20

Please sign in to comment.