From 8fc1b20bdf3296e6b7a37f26a4763dee0271e30d Mon Sep 17 00:00:00 2001 From: Benjamin Clancy Date: Tue, 17 Sep 2024 15:32:04 -0400 Subject: [PATCH] Change AppDomain tool to output sdc instead of inside-app-domain --- .../applicabilityDomainScript.py | 13 ++++---- applicabilityDomain/example_output.csv | 32 +++++++++---------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/applicabilityDomain/applicabilityDomainScript.py b/applicabilityDomain/applicabilityDomainScript.py index 284a818..9edfdd8 100644 --- a/applicabilityDomain/applicabilityDomainScript.py +++ b/applicabilityDomain/applicabilityDomainScript.py @@ -7,8 +7,6 @@ from chembl_structure_pipeline import standardizer from rdkit.Chem import rdFingerprintGenerator -APPLICABILITY_DOMAIN_THRESHOLD = 0.2 - def argParse(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--inputFile", nargs=1, help="Location of the csv containing the SMILES you wish to test") @@ -32,6 +30,7 @@ def __init__(self, name, original): self.smiles = None self.fp = None + def standardize(compounds): for compound in compounds: try: @@ -74,21 +73,21 @@ def getCompounds(path): def applicabilityDomain(testCompounds, proteinCompounds): for compound in testCompounds: if compound.fp != None: - outsideApplicabilityDomain = {} + sdc = {} maxTanimotoSimilarity = 0 for proteinCompound in proteinCompounds: maxTanimotoSimilarity = max(maxTanimotoSimilarity, DataStructs.FingerprintSimilarity(compound.fp, proteinCompound.fp, metric=DataStructs.TanimotoSimilarity)) - outsideApplicabilityDomain = maxTanimotoSimilarity > APPLICABILITY_DOMAIN_THRESHOLD - compound.outsideApplicabilityDomain = outsideApplicabilityDomain + sdc = maxTanimotoSimilarity + compound.sdc = sdc def write_csv(compounds, outputChannel): writer = csv.writer(outputChannel, lineterminator='\n') - header = ["Name", "SMILES (original)", "SMILES (standardized)", "Within Applicability Domain"] + header = ["Name", "SMILES (original)", "SMILES (standardized)", "SDC"] _ = writer.writerow(header) for compound in compounds: - row = [compound.name, compound.original, compound.smiles, compound.outsideApplicabilityDomain == False] + row = [compound.name, compound.original, compound.smiles, compound.sdc] _ = writer.writerow(row) if __name__ == "__main__": diff --git a/applicabilityDomain/example_output.csv b/applicabilityDomain/example_output.csv index fff9077..16aefe1 100644 --- a/applicabilityDomain/example_output.csv +++ b/applicabilityDomain/example_output.csv @@ -1,16 +1,16 @@ -Name,SMILES (original),SMILES (standardized),Within Applicability Domain -Verapamil,COC1=C(OC)C=C(CCN(C)CCCC(C#N)(C(C)C)C2=CC(OC)=C(OC)C=C2)C=C1,COc1ccc(CCN(C)CCCC(C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,False -Lopinavir,CC1=C(C(=CC=C1)C)OCC(=O)NC(CC2=CC=CC=C2)C(CC(CC3=CC=CC=C3)NC(=O)C(C(C)C)N4CCCNC4=O)O,Cc1cccc(C)c1OCC(=O)NC(Cc1ccccc1)C(O)CC(Cc1ccccc1)NC(=O)C(C(C)C)N1CCCNC1=O,False -Probenecid,CCCN(CCC)S(=O)(=O)C1=CC=C(C=C1)C(=O)O,CCCN(CCC)S(=O)(=O)c1ccc(C(=O)O)cc1,True -Saquinavir,CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(C(CC3=CC=CC=C3)NC(=O)C(CC(=O)N)NC(=O)C4=NC5=CC=CC=C5C=C4)O,CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1,False -Bromosulphophthalein,[O-]S(=O)(=O)c1c(O)ccc(c1)C3(OC(=O)c2c(Br)c(Br)c(Br)c(Br)c23)c4ccc(O)c(c4)S([O-])(=O)=O,O=C1OC(c2ccc(O)c(S(=O)(=O)O)c2)(c2ccc(O)c(S(=O)(=O)O)c2)c2c(Br)c(Br)c(Br)c(Br)c21,True -Methotrexate,CN(CC1=CN=C2C(=N1)C(=NC(=N2)N)N)C3=CC=C(C=C3)C(=O)NC(CCC(=O)O)C(=O)O,CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1,True -Furosemide,C1=COC(=C1)CNC2=CC(=C(C=C2C(=O)O)S(=O)(=O)N)Cl,NS(=O)(=O)c1cc(C(=O)O)c(NCc2ccco2)cc1Cl,False -Metformin,CN(C)C(=N)N=C(N)N,CN(C)C(=N)N=C(N)N,False -Cimetidine,CC1=C(N=CN1)CSCCNC(=NC)NC#N,CN=C(NC#N)NCCSCc1nc[nH]c1C,False -Procainamide,CCN(CC)CCNC(=O)C1=CC=C(C=C1)N,CCN(CC)CCNC(=O)c1ccc(N)cc1,True -Oestrone,CC12CCC3C(C1CCC2=O)CCC4=C3C=CC(=C4)O,CC12CCC3c4ccc(O)cc4CCC3C1CCC2=O,True -Pravastatin,CCC(C)C(=O)OC1CC(C=C2C1C(C(C=C2)C)CCC(CC(CC(=O)O)O)O)O,CCC(C)C(=O)OC1CC(O)C=C2C=CC(C)C(CCC(O)CC(O)CC(=O)O)C21,True -Delaviridine,CC(C)NC1=C(N=CC=C1)N1CCN(CC1)C(=O)C1=CC2=C(N1)C=CC(NS(C)(=O)=O)=C2,CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1,False -Loperamide,CN(C)C(=O)C(CCN1CCC(O)(CC1)C1=CC=C(Cl)C=C1)(C1=CC=CC=C1)C1=CC=CC=C1,CN(C)C(=O)C(CCN1CCC(O)(c2ccc(Cl)cc2)CC1)(c1ccccc1)c1ccccc1,False -Rifampicin,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,COC1C=COC2(C)Oc3c(C)c(O)c4c(O)c(c(C=NN5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)C(C)=CC=CC(C)C(O)C(C)C(O)C(C)C(OC(C)=O)C1C,True +Name,SMILES (original),SMILES (standardized),SDC +Verapamil,COC1=C(OC)C=C(CCN(C)CCCC(C#N)(C(C)C)C2=CC(OC)=C(OC)C=C2)C=C1,COc1ccc(CCN(C)CCCC(C#N)(c2ccc(OC)c(OC)c2)C(C)C)cc1OC,0.23529411764705882 +Lopinavir,CC1=C(C(=CC=C1)C)OCC(=O)NC(CC2=CC=CC=C2)C(CC(CC3=CC=CC=C3)NC(=O)C(C(C)C)N4CCCNC4=O)O,Cc1cccc(C)c1OCC(=O)NC(Cc1ccccc1)C(O)CC(Cc1ccccc1)NC(=O)C(C(C)C)N1CCCNC1=O,0.21794871794871795 +Probenecid,CCCN(CCC)S(=O)(=O)C1=CC=C(C=C1)C(=O)O,CCCN(CCC)S(=O)(=O)c1ccc(C(=O)O)cc1,0.19696969696969696 +Saquinavir,CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(C(CC3=CC=CC=C3)NC(=O)C(CC(=O)N)NC(=O)C4=NC5=CC=CC=C5C=C4)O,CC(C)(C)NC(=O)C1CC2CCCCC2CN1CC(O)C(Cc1ccccc1)NC(=O)C(CC(N)=O)NC(=O)c1ccc2ccccc2n1,0.20454545454545456 +Bromosulphophthalein,[O-]S(=O)(=O)c1c(O)ccc(c1)C3(OC(=O)c2c(Br)c(Br)c(Br)c(Br)c23)c4ccc(O)c(c4)S([O-])(=O)=O,O=C1OC(c2ccc(O)c(S(=O)(=O)O)c2)(c2ccc(O)c(S(=O)(=O)O)c2)c2c(Br)c(Br)c(Br)c(Br)c21,0.1509433962264151 +Methotrexate,CN(CC1=CN=C2C(=N1)C(=NC(=N2)N)N)C3=CC=C(C=C3)C(=O)NC(CCC(=O)O)C(=O)O,CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(C(=O)NC(CCC(=O)O)C(=O)O)cc1,0.15789473684210525 +Furosemide,C1=COC(=C1)CNC2=CC(=C(C=C2C(=O)O)S(=O)(=O)N)Cl,NS(=O)(=O)c1cc(C(=O)O)c(NCc2ccco2)cc1Cl,0.21666666666666667 +Metformin,CN(C)C(=N)N=C(N)N,CN(C)C(=N)N=C(N)N,0.36 +Cimetidine,CC1=C(N=CN1)CSCCNC(=NC)NC#N,CN=C(NC#N)NCCSCc1nc[nH]c1C,0.6862745098039216 +Procainamide,CCN(CC)CCNC(=O)C1=CC=C(C=C1)N,CCN(CC)CCNC(=O)c1ccc(N)cc1,0.19117647058823528 +Oestrone,CC12CCC3C(C1CCC2=O)CCC4=C3C=CC(=C4)O,CC12CCC3c4ccc(O)cc4CCC3C1CCC2=O,0.13043478260869565 +Pravastatin,CCC(C)C(=O)OC1CC(C=C2C1C(C(C=C2)C)CCC(CC(CC(=O)O)O)O)O,CCC(C)C(=O)OC1CC(O)C=C2C=CC(C)C(CCC(O)CC(O)CC(=O)O)C21,0.13953488372093023 +Delaviridine,CC(C)NC1=C(N=CC=C1)N1CCN(CC1)C(=O)C1=CC2=C(N1)C=CC(NS(C)(=O)=O)=C2,CC(C)Nc1cccnc1N1CCN(C(=O)c2cc3cc(NS(C)(=O)=O)ccc3[nH]2)CC1,0.21978021978021978 +Loperamide,CN(C)C(=O)C(CCN1CCC(O)(CC1)C1=CC=C(Cl)C=C1)(C1=CC=CC=C1)C1=CC=CC=C1,CN(C)C(=O)C(CCN1CCC(O)(c2ccc(Cl)cc2)CC1)(c1ccccc1)c1ccccc1,0.20833333333333334 +Rifampicin,CC1C=CC=C(C(=O)NC2=C(C(=C3C(=C2O)C(=C(C4=C3C(=O)C(O4)(OC=CC(C(C(C(C(C(C1O)C)O)C)OC(=O)C)C)OC)C)C)O)O)C=NN5CCN(CC5)C)C,COC1C=COC2(C)Oc3c(C)c(O)c4c(O)c(c(C=NN5CCN(C)CC5)c(O)c4c3C2=O)NC(=O)C(C)=CC=CC(C)C(O)C(C)C(O)C(C)C(OC(C)=O)C1C,0.11643835616438356