From 28fc8c204dea89fde49270c05a08dbdb677f7d07 Mon Sep 17 00:00:00 2001
From: The Open Journals editorial robot
 <89919391+editorialbot@users.noreply.github.com>
Date: Thu, 9 Nov 2023 13:52:19 +0000
Subject: [PATCH] Creating 10.21105.joss.05763.jats

---
 joss.05763/10.21105.joss.05763.jats | 708 ++++++++++++++++++++++++++++
 1 file changed, 708 insertions(+)
 create mode 100644 joss.05763/10.21105.joss.05763.jats
diff --git a/joss.05763/10.21105.joss.05763.jats b/joss.05763/10.21105.joss.05763.jats
new file mode 100644
index 0000000000..faf311ccf7
--- /dev/null
+++ b/joss.05763/10.21105.joss.05763.jats
@@ -0,0 +1,708 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN"
+                  "JATS-publishing1.dtd">
+<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other">
+<front>
+<journal-meta>
+<journal-id></journal-id>
+<journal-title-group>
+<journal-title>Journal of Open Source Software</journal-title>
+<abbrev-journal-title>JOSS</abbrev-journal-title>
+</journal-title-group>
+<issn publication-format="electronic">2475-9066</issn>
+<publisher>
+<publisher-name>Open Journals</publisher-name>
+</publisher>
+</journal-meta>
+<article-meta>
+<article-id pub-id-type="publisher-id">5763</article-id>
+<article-id pub-id-type="doi">10.21105/joss.05763</article-id>
+<title-group>
+<article-title>OpenFEPOPS: A Python implementation of the FEPOPS
+molecular similarity technique</article-title>
+</title-group>
+<contrib-group>
+<contrib contrib-type="author">
+<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-7161-9503</contrib-id>
+<name>
+<surname>Chen</surname>
+<given-names>Yan-Kai</given-names>
+</name>
+<xref ref-type="aff" rid="aff-1"/>
+</contrib>
+<contrib contrib-type="author">
+<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-3469-1546</contrib-id>
+<name>
+<surname>Houston</surname>
+<given-names>Douglas R.</given-names>
+</name>
+<xref ref-type="aff" rid="aff-1"/>
+</contrib>
+<contrib contrib-type="author">
+<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-8920-3522</contrib-id>
+<name>
+<surname>Auer</surname>
+<given-names>Manfred</given-names>
+</name>
+<xref ref-type="aff" rid="aff-1"/>
+<xref ref-type="aff" rid="aff-2"/>
+</contrib>
+<contrib contrib-type="author" corresp="yes">
+<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-6996-3663</contrib-id>
+<name>
+<surname>Shave</surname>
+<given-names>Steven</given-names>
+</name>
+<xref ref-type="aff" rid="aff-1"/>
+<xref ref-type="corresp" rid="cor-1"><sup>*</sup></xref>
+</contrib>
+<aff id="aff-1">
+<institution-wrap>
+<institution>School of Biological Sciences, University of Edinburgh, The
+King’s Buildings, Max Born Crescent, CH Waddington Building, Edinburgh,
+EH9 3BF, United Kingdom.</institution>
+</institution-wrap>
+</aff>
+<aff id="aff-2">
+<institution-wrap>
+<institution>Xenobe Research Institute, P. O. Box 3052, San Diego,
+California, 92163, United States.</institution>
+</institution-wrap>
+</aff>
+</contrib-group>
+<author-notes>
+<corresp id="cor-1">* E-mail: <email></email></corresp>
+</author-notes>
+<pub-date date-type="pub" publication-format="electronic" iso-8601-date="2023-08-06">
+<day>6</day>
+<month>8</month>
+<year>2023</year>
+</pub-date>
+<volume>8</volume>
+<issue>91</issue>
+<fpage>5763</fpage>
+<permissions>
+<copyright-statement>Authors of papers retain copyright and release the
+work under a Creative Commons Attribution 4.0 International License (CC
+BY 4.0)</copyright-statement>
+<copyright-year>2022</copyright-year>
+<copyright-holder>The article authors</copyright-holder>
+<license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
+<license-p>Authors of papers retain copyright and release the work under
+a Creative Commons Attribution 4.0 International License (CC BY
+4.0)</license-p>
+</license>
+</permissions>
+<kwd-group kwd-group-type="author">
+<kwd>Python</kwd>
+<kwd>molecular similarity</kwd>
+<kwd>virtual screening</kwd>
+<kwd>pharmacophores</kwd>
+<kwd>feature points</kwd>
+</kwd-group>
+</article-meta>
+</front>
+<body>
+<sec id="summary">
+  <title>Summary</title>
+  <p>OpenFEPOPS is an open-source Python implementation of the FEature
+  POint PharmacophoreS (FEPOPS) molecular similarity technique
+  (<xref alt="Jenkins et al., 2004" rid="ref-jenkins20043d" ref-type="bibr">Jenkins
+  et al., 2004</xref>;
+  <xref alt="Jenkins, 2013" rid="ref-jenkins2013feature" ref-type="bibr">Jenkins,
+  2013</xref>;
+  <xref alt="Nettles et al., 2007" rid="ref-nettles2007flexible" ref-type="bibr">Nettles
+  et al., 2007</xref>) enabling descriptor generation, comparison, and
+  ranking of molecules in virtual screening campaigns. Ligand based
+  virtual screening
+  (<xref alt="Ripphausen et al., 2011" rid="ref-ripphausen2011state" ref-type="bibr">Ripphausen
+  et al., 2011</xref>) is a fundamental approach undertaken to expand
+  hit series or perform scaffold hopping whereby new chemistries and
+  synthetic routes are made available in efforts to remove undesirable
+  molecular properties and discover better starting points in the early
+  stages of drug discovery
+  (<xref alt="Hughes et al., 2011" rid="ref-hughes2011principles" ref-type="bibr">Hughes
+  et al., 2011</xref>). Typically, these techniques query hit molecules
+  against proprietary, in-house, or publicly available repositories of
+  small molecules in the hope of finding close matches which will
+  display similar activities to the query based on the molecular
+  similarity principle which states that similar molecules should have
+  similar properties and make similar interactions
+  (<xref alt="Cortés-Ciriano et al., 2020" rid="ref-cortes2020qsar" ref-type="bibr">Cortés-Ciriano
+  et al., 2020</xref>). Often batteries of these similarity measures are
+  used in parallel, helping to score molecules from many different
+  subjective viewpoints and measures of similarity
+  (<xref alt="Baber et al., 2006" rid="ref-baber2006use" ref-type="bibr">Baber
+  et al., 2006</xref>). The central idea behind FEPOPS is reducing the
+  complexity of molecules by merging of local atomic environments and
+  atom properties into ‘feature points’. This compressed feature point
+  representation has been used to great effect as noted in literature,
+  helping researchers identify active and potentially therapeutically
+  valuable small molecules. By default, OpenFEPOPS uses literature
+  reported parameters which show good performance in retrieval of active
+  lead- and drug-like small molecules within virtual screening
+  campaigns, with feature points capturing charge, lipophilicity, and
+  hydrogen bond acceptor and donor status. When run with default
+  parameters, OpenFepops compactly represents molecules using seven sets
+  of four feature points, with each feature point encoded into 22
+  numeric values, resulting in a compact representation of 616 bytes per
+  molecule. By extension, this allows the indexing of a compound archive
+  containing 1 million small molecules using 587.5 MB of data. Whilst
+  more compact representations are readily available, the FEPOPS
+  technique strives to capture tautomer and conformer information, first
+  through enumeration and then through diversity driven selection of
+  representative FEPOPS descriptors to capture the diverse states that a
+  molecule may adopt.</p>
+</sec>
+<sec id="statement-of-need">
+  <title>Statement of need</title>
+  <p>At the time of writing, <monospace>OpenFEPOPS</monospace> is the
+  only publicly available implementation of the FEPOPS molecular
+  similarity technique. Whilst used within industry and referenced
+  extensively in literature, it has been unavailable to researchers as
+  an open-source tool. We welcome contributions and collaborative
+  efforts to enhance and expand OpenFEPOPS using the associated GitHub
+  repository. It is therefore hoped that this will allow the technique
+  to be used not only for traditional small molecule molecular
+  similarity, but also in new emerging fields such as protein design and
+  featurisation of small- and macro-molecules for both predictive and
+  generative tasks.</p>
+</sec>
+<sec id="brief-software-description">
+  <title>Brief software description</title>
+  <p>Whilst OpenFEPOPS has included functionality for descriptor caching
+  and profiling of libraries, the core functionality of the package is
+  descriptor generation and scoring.</p>
+  <sec id="descriptor-generation">
+    <title><italic>Descriptor generation:</italic></title>
+    <p>The OpenFEPOPS descriptor generation process as outlined in
+    <xref alt="[fig:descriptor_generation]" rid="figU003Adescriptor_generation">[fig:descriptor_generation]</xref>
+    follows;</p>
+    <list list-type="order">
+      <list-item>
+        <p>Tautomer enumeration</p>
+        <list list-type="bullet">
+          <list-item>
+            <p>For a given small molecule, OpenFEPOPS uses RDKit
+            (<xref alt="Landrum, 2013" rid="ref-landrum2013rdkit" ref-type="bibr">Landrum,
+            2013</xref>) to iterate over molecular tautomers. By
+            default, there is no limit to the number of recoverable
+            tautomers but a limit may be imposed which may be necessary
+            if adapting the OpenFEPOPS code to large macromolecules and
+            not just small molecules.</p>
+          </list-item>
+        </list>
+      </list-item>
+      <list-item>
+        <p>Conformer enumeration</p>
+        <list list-type="bullet">
+          <list-item>
+            <p>For each tautomer, up to 1024 conformers are sampled by
+            either complete enumeration of rotatable bond states (at the
+            literature reported optimum increment of 90 degrees) if
+            there are five or less rotatable bonds, or through random
+            sampling of 1024 possible states if there are more than 5
+            rotatable bonds.</p>
+          </list-item>
+        </list>
+      </list-item>
+      <list-item>
+        <p>Defining feature points</p>
+        <list list-type="bullet">
+          <list-item>
+            <p>The KMeans algorithm
+            (<xref alt="Arthur &amp; Vassilvitskii, 2007" rid="ref-arthur2007k" ref-type="bibr">Arthur
+            &amp; Vassilvitskii, 2007</xref>) is applied to each
+            conformer of each tautomer to identify four (by default)
+            representative or central points, into which the atomic
+            information of neighbouring atoms is collapsed. As standard,
+            the atomic properties of charge, logP, hydrogen bond donor,
+            and hydrogen bond acceptor status are collapsed into four
+            feature points per unique tautomer conformation. The RDKit
+            package is used to calculate these properties with the
+            iterative Gasteiger charges algorithm
+            (<xref alt="Gasteiger &amp; Marsili, 1980" rid="ref-gasteiger1980iterative" ref-type="bibr">Gasteiger
+            &amp; Marsili, 1980</xref>) applied to assign atomic
+            charges, the Crippen method
+            (<xref alt="Wildman &amp; Crippen, 1999" rid="ref-wildman1999prediction" ref-type="bibr">Wildman
+            &amp; Crippen, 1999</xref>) used to assign atomic logP
+            contributions, and hydrogen bond acceptors and donors
+            identified with appropriate
+            (<xref alt="Gillet et al., 1998" rid="ref-gillet1998identification" ref-type="bibr">Gillet
+            et al., 1998</xref>) SMARTS substructure queries. These
+            feature points are encoded to 22 numeric values (a FEPOP)
+            comprising four points, each with four properties, and six
+            pairwise distances between these points. With many FEPOPS
+            descriptors collected from a single molecule through
+            tautomer and conformer enumeration, this set of
+            representative FEPOPS should capture every possible state of
+            the original molecule.</p>
+          </list-item>
+        </list>
+      </list-item>
+      <list-item>
+        <p>Selection of diverse FEPOPS</p>
+        <list list-type="bullet">
+          <list-item>
+            <p>From the collection of FEPOPS derived from every tautomer
+            conformation of a molecule, the K-Medoid algorithm
+            (<xref alt="Park &amp; Jun, 2009" rid="ref-park2009simple" ref-type="bibr">Park
+            &amp; Jun, 2009</xref>) is applied to identify seven (by
+            default) diverse FEPOPS which are thought to best capture a
+            fuzzy representation of the molecule. These seven FEPOPS
+            each comprise 22 descriptors each, totaling 154 32-bit
+            floating point numbers or 616 bytes.</p>
+          </list-item>
+        </list>
+      </list-item>
+    </list>
+    <fig>
+      <caption><p>OpenFEPOPS descriptor generation showing the capture
+      of tautomer and conformer information from a single input
+      molecule.<styled-content id="figU003Adescriptor_generation"></styled-content></p></caption>
+      <graphic mimetype="image" mime-subtype="png" xlink:href="media/Figure1.png" />
+    </fig>
+    <p>Descriptor generation with OpenFEPOPS is a compute intensive task
+    and as noted in literature, designed to be run in situations where
+    large compound archives have had their descriptors pre-generated and
+    are queried against relatively small numbers of new molecules for
+    which descriptors are not known and are ad-hoc generated. To enable
+    use in this manner, OpenFEPOPS provides functionality to cache
+    descriptors through specification of database files, either in the
+    SQLite or JSON formats.</p>
+  </sec>
+  <sec id="scoring-and-comparison-of-molecules-based-on-their-molecular-descriptors">
+    <title>Scoring and comparison of molecules based on their molecular
+    descriptors</title>
+    <list list-type="order">
+      <list-item>
+        <p>Sorting</p>
+        <list list-type="bullet">
+          <list-item>
+            <p>With seven (by default) diverse FEPOPS representing a
+            small molecule, the FEPOPS are sorted by ascending
+            charge.</p>
+          </list-item>
+        </list>
+      </list-item>
+      <list-item>
+        <p>Scaling</p>
+        <list list-type="bullet">
+          <list-item>
+            <p>Due to the different scales and distributions of features
+            comprising FEPOPS descriptors, each FEPOP is centered and
+            scaled according to observed mean and standard deviations of
+            the same features within a larger pool of molecules. By
+            default, these means and standard deviations have been
+            derived from the DUDE
+            (<xref alt="Mysinger et al., 2012" rid="ref-mysinger2012directory" ref-type="bibr">Mysinger
+            et al., 2012</xref>) diversity set which captures known
+            actives and decoys for a diverse set of therapeutic targets
+            (See the Jupyter notebook ‘Explore_DUDE_diversity_set.ipynb’
+            in the source repository for further methods).</p>
+          </list-item>
+        </list>
+      </list-item>
+      <list-item>
+        <p>Scoring</p>
+        <list list-type="bullet">
+          <list-item>
+            <p>The Pearson correlation coefficient is calculated for the
+            scaled descriptors of the first molecule to the scaled
+            descriptors of the second.</p>
+          </list-item>
+        </list>
+      </list-item>
+    </list>
+    <p>Literature highlights that the choice of the Pearson correlation
+    coefficient leads to high background scores as it is highly unlikely
+    to see little correlation between any molecule due to fundamental
+    limitations of chemistry and geometry. Therefore, unrelated
+    molecules are likely to have FEPOPS similarity scores higher than
+    those encountered with more traditional techniques such as bitstring
+    fingerprints and Tanimoto or Dice similarity measures.</p>
+    <p>The predictive performance of OpenFEPOPS was evaluated using the
+    DUDE
+    (<xref alt="Mysinger et al., 2012" rid="ref-mysinger2012directory" ref-type="bibr">Mysinger
+    et al., 2012</xref>) diversity set. This dataset comprises eight
+    protein targets accompanied by decoy ligands and known active
+    ligands. For each target, actives were used as queries to retrieve
+    all other actives. Retrieval rankings were assessed using the AUROC
+    (Area Under Receiver Operating Characteristic) metric
+    (<xref alt="Fawcett, 2006" rid="ref-fawcett2006introduction" ref-type="bibr">Fawcett,
+    2006</xref>) and scores for each active averaged within targets to
+    assign a final average AUROC score for each target. Table 1 shows
+    the average AUROC scores for DUDE diversity set targets along with
+    scores obtained using the popular Morgan 2, MACCS, and RDKit
+    fingerprints as implemented in RDKit and scored using the Tanimoto
+    distance metric. See the Jupyter notebook
+    ‘Explore_DUDE_diversity_set.ipynb’ in the source repository for
+    further methods and data availability using the FigShare service.
+    All evaluated similarity techniques perform comparably with average
+    AUROC scores of 0.723, 0.692, 0.687, and 0.701 for Morgan 2, MACCS,
+    RDKit and OpenFEPOPS respectively. OpenFEPOPS achieves comparable
+    performance to other metrics using 3D representations of molecules
+    across a range of tautomer states which is in stark contrast to the
+    approaches taken by the other connectivity and fingerprint-based
+    methods. Diversity in similarity techniques allows potentially
+    interesting actives undiscoverable with one technique to be flagged
+    and ranked highly by another, offering new routes to novelty, new
+    chemistries, and efficacious leads from early-stage drug discovery
+    efforts.</p>
+    <table-wrap>
+      <table>
+        <thead>
+          <tr>
+            <th align="right">Target</th>
+            <th align="right">Morgan 2</th>
+            <th align="right">MACCS</th>
+            <th align="right">RDKit</th>
+            <th align="right">OpenFEPOPS</th>
+          </tr>
+        </thead>
+        <tbody>
+          <tr>
+            <td align="right">akt1</td>
+            <td align="right">0.836</td>
+            <td align="right">0.741</td>
+            <td align="right">0.833</td>
+            <td align="right">0.831</td>
+          </tr>
+          <tr>
+            <td align="right">ampc</td>
+            <td align="right">0.784</td>
+            <td align="right">0.673</td>
+            <td align="right">0.660</td>
+            <td align="right">0.639</td>
+          </tr>
+          <tr>
+            <td align="right">cp3a4</td>
+            <td align="right">0.603</td>
+            <td align="right">0.582</td>
+            <td align="right">0.613</td>
+            <td align="right">0.647</td>
+          </tr>
+          <tr>
+            <td align="right">cxcr4</td>
+            <td align="right">0.697</td>
+            <td align="right">0.854</td>
+            <td align="right">0.592</td>
+            <td align="right">0.899</td>
+          </tr>
+          <tr>
+            <td align="right">gcr</td>
+            <td align="right">0.670</td>
+            <td align="right">0.666</td>
+            <td align="right">0.708</td>
+            <td align="right">0.616</td>
+          </tr>
+          <tr>
+            <td align="right">hivpr</td>
+            <td align="right">0.780</td>
+            <td align="right">0.681</td>
+            <td align="right">0.759</td>
+            <td align="right">0.678</td>
+          </tr>
+          <tr>
+            <td align="right">hivrt</td>
+            <td align="right">0.651</td>
+            <td align="right">0.670</td>
+            <td align="right">0.660</td>
+            <td align="right">0.582</td>
+          </tr>
+          <tr>
+            <td align="right">kif11</td>
+            <td align="right">0.763</td>
+            <td align="right">0.668</td>
+            <td align="right">0.672</td>
+            <td align="right">0.713</td>
+          </tr>
+        </tbody>
+      </table>
+    </table-wrap>
+    <p><bold>Table 1:</bold> Averaged AUROC scores by target and
+    molecular similarity technique for the DUDE diversity set. Across
+    all datasets, 19 small molecules out of 112,796 were excluded from
+    analysis mainly due to issues in parsing to valid structures using
+    RDKit.</p>
+  </sec>
+  <sec id="availability-usage-and-documentation">
+    <title>Availability, usage and documentation</title>
+    <p>OpenFEPOPS has been uploaded to the Python Packaging Index under
+    the name ‘fepops’ and as such is installable using the pip package
+    manager and the command <monospace>pip install fepops</monospace>.
+    With the package installed, entrypoints are used to expose commonly
+    used OpenFEPOPS tasks such as descriptor generation and calculation
+    on molecular similarity, enabling simple command line access without
+    the need to explicitly invoke a Python interpreter. Whilst
+    OpenFEPOPS may be used solely via the command line interface, a
+    robust API is available and may be used within other programs or
+    integrated into existing pipelines to enable more complex workflows.
+    Extensive API documentation is available at
+    https://justinykc.github.io/FEPOPS, along with a concise user-guide
+    at https://justinykc.github.io/FEPOPS/readme.html</p>
+  </sec>
+</sec>
+</body>
+<back>
+<ref-list>
+  <ref id="ref-nettles2007flexible">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Nettles</surname><given-names>James H</given-names></name>
+        <name><surname>Jenkins</surname><given-names>Jeremy L</given-names></name>
+        <name><surname>Williams</surname><given-names>Chris</given-names></name>
+        <name><surname>Clark</surname><given-names>Alex M</given-names></name>
+        <name><surname>Bender</surname><given-names>Andreas</given-names></name>
+        <name><surname>Deng</surname><given-names>Zhan</given-names></name>
+        <name><surname>Davies</surname><given-names>John W</given-names></name>
+        <name><surname>Glick</surname><given-names>Meir</given-names></name>
+      </person-group>
+      <article-title>Flexible 3D pharmacophores as descriptors of dynamic biological space</article-title>
+      <source>Journal of Molecular Graphics and Modelling</source>
+      <publisher-name>Elsevier</publisher-name>
+      <year iso-8601-date="2007">2007</year>
+      <volume>26</volume>
+      <issue>3</issue>
+      <pub-id pub-id-type="doi">10.1016/j.jmgm.2007.02.005</pub-id>
+      <fpage>622</fpage>
+      <lpage>633</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-jenkins20043d">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Jenkins</surname><given-names>Jeremy L</given-names></name>
+        <name><surname>Glick</surname><given-names>Meir</given-names></name>
+        <name><surname>Davies</surname><given-names>John W</given-names></name>
+      </person-group>
+      <article-title>A 3D similarity method for scaffold hopping from known drugs or natural ligands to new chemotypes</article-title>
+      <source>Journal of medicinal chemistry</source>
+      <publisher-name>ACS Publications</publisher-name>
+      <year iso-8601-date="2004">2004</year>
+      <volume>47</volume>
+      <issue>25</issue>
+      <pub-id pub-id-type="doi">10.1021/jm049654z</pub-id>
+      <fpage>6144</fpage>
+      <lpage>6159</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-jenkins2013feature">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Jenkins</surname><given-names>Jeremy L</given-names></name>
+      </person-group>
+      <article-title>Feature point pharmacophores (FEPOPS)</article-title>
+      <source>Scaffold hopping in medicinal chemistry</source>
+      <publisher-name>Wiley Online Library</publisher-name>
+      <year iso-8601-date="2013">2013</year>
+      <pub-id pub-id-type="doi">10.1002/9783527665143.ch10</pub-id>
+      <fpage>155</fpage>
+      <lpage>174</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-ripphausen2011state">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Ripphausen</surname><given-names>Peter</given-names></name>
+        <name><surname>Nisius</surname><given-names>Britta</given-names></name>
+        <name><surname>Bajorath</surname><given-names>Jürgen</given-names></name>
+      </person-group>
+      <article-title>State-of-the-art in ligand-based virtual screening</article-title>
+      <source>Drug discovery today</source>
+      <publisher-name>Elsevier</publisher-name>
+      <year iso-8601-date="2011">2011</year>
+      <volume>16</volume>
+      <issue>9-10</issue>
+      <pub-id pub-id-type="doi">10.1016/j.drudis.2011.02.011</pub-id>
+      <fpage>372</fpage>
+      <lpage>376</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-hughes2011principles">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Hughes</surname><given-names>James P</given-names></name>
+        <name><surname>Rees</surname><given-names>Stephen</given-names></name>
+        <name><surname>Kalindjian</surname><given-names>S Barrett</given-names></name>
+        <name><surname>Philpott</surname><given-names>Karen L</given-names></name>
+      </person-group>
+      <article-title>Principles of early drug discovery</article-title>
+      <source>British journal of pharmacology</source>
+      <publisher-name>Wiley Online Library</publisher-name>
+      <year iso-8601-date="2011">2011</year>
+      <volume>162</volume>
+      <issue>6</issue>
+      <pub-id pub-id-type="doi">10.1111/j.1476-5381.2010.01127.x</pub-id>
+      <fpage>1239</fpage>
+      <lpage>1249</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-cortes2020qsar">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Cortés-Ciriano</surname><given-names>Isidro</given-names></name>
+        <name><surname>Škuta</surname><given-names>Ctibor</given-names></name>
+        <name><surname>Bender</surname><given-names>Andreas</given-names></name>
+        <name><surname>Svozil</surname><given-names>Daniel</given-names></name>
+      </person-group>
+      <article-title>QSAR-derived affinity fingerprints (part 2): Modeling performance for potency prediction</article-title>
+      <source>Journal of Cheminformatics</source>
+      <publisher-name>Springer</publisher-name>
+      <year iso-8601-date="2020">2020</year>
+      <volume>12</volume>
+      <issue>1</issue>
+      <pub-id pub-id-type="doi">10.1186/s13321-020-00444-5</pub-id>
+      <fpage>41</fpage>
+      <lpage></lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-baber2006use">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Baber</surname><given-names>J Christian</given-names></name>
+        <name><surname>Shirley</surname><given-names>William A</given-names></name>
+        <name><surname>Gao</surname><given-names>Yinghong</given-names></name>
+        <name><surname>Feher</surname><given-names>Miklos</given-names></name>
+      </person-group>
+      <article-title>The use of consensus scoring in ligand-based virtual screening</article-title>
+      <source>Journal of chemical information and modeling</source>
+      <publisher-name>ACS Publications</publisher-name>
+      <year iso-8601-date="2006">2006</year>
+      <volume>46</volume>
+      <issue>1</issue>
+      <pub-id pub-id-type="doi">10.1021/ci050296y</pub-id>
+      <fpage>277</fpage>
+      <lpage>288</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-mysinger2012directory">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Mysinger</surname><given-names>Michael M</given-names></name>
+        <name><surname>Carchia</surname><given-names>Michael</given-names></name>
+        <name><surname>Irwin</surname><given-names>John J</given-names></name>
+        <name><surname>Shoichet</surname><given-names>Brian K</given-names></name>
+      </person-group>
+      <article-title>Directory of useful decoys, enhanced (DUD-e): Better ligands and decoys for better benchmarking</article-title>
+      <source>Journal of medicinal chemistry</source>
+      <publisher-name>ACS Publications</publisher-name>
+      <year iso-8601-date="2012">2012</year>
+      <volume>55</volume>
+      <issue>14</issue>
+      <pub-id pub-id-type="doi">10.1021/jm300687e</pub-id>
+      <fpage>6582</fpage>
+      <lpage>6594</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-landrum2013rdkit">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Landrum</surname><given-names>Greg</given-names></name>
+      </person-group>
+      <article-title>RDKit: Open-source cheminformatics</article-title>
+      <year iso-8601-date="2013">2013</year>
+    </element-citation>
+  </ref>
+  <ref id="ref-arthur2007k">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Arthur</surname><given-names>David</given-names></name>
+        <name><surname>Vassilvitskii</surname><given-names>Sergei</given-names></name>
+      </person-group>
+      <article-title>K-means++ the advantages of careful seeding</article-title>
+      <source>Proceedings of the eighteenth annual ACM-SIAM symposium on discrete algorithms</source>
+      <year iso-8601-date="2007">2007</year>
+      <uri>https://dl.acm.org/doi/abs/10.5555/1283383.1283494</uri>
+      <fpage>1027</fpage>
+      <lpage>1035</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-park2009simple">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Park</surname><given-names>Hae-Sang</given-names></name>
+        <name><surname>Jun</surname><given-names>Chi-Hyuck</given-names></name>
+      </person-group>
+      <article-title>A simple and fast algorithm for k-medoids clustering</article-title>
+      <source>Expert systems with applications</source>
+      <publisher-name>Elsevier</publisher-name>
+      <year iso-8601-date="2009">2009</year>
+      <volume>36</volume>
+      <issue>2</issue>
+      <pub-id pub-id-type="doi">10.1016/j.eswa.2008.01.039</pub-id>
+      <fpage>3336</fpage>
+      <lpage>3341</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-gillet1998identification">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Gillet</surname><given-names>Valerie J</given-names></name>
+        <name><surname>Willett</surname><given-names>Peter</given-names></name>
+        <name><surname>Bradshaw</surname><given-names>John</given-names></name>
+      </person-group>
+      <article-title>Identification of biological activity profiles using substructural analysis and genetic algorithms</article-title>
+      <source>Journal of chemical information and computer sciences</source>
+      <publisher-name>ACS Publications</publisher-name>
+      <year iso-8601-date="1998">1998</year>
+      <volume>38</volume>
+      <issue>2</issue>
+      <pub-id pub-id-type="doi">10.1021/ci970431+</pub-id>
+      <fpage>165</fpage>
+      <lpage>179</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-wildman1999prediction">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Wildman</surname><given-names>Scott A</given-names></name>
+        <name><surname>Crippen</surname><given-names>Gordon M</given-names></name>
+      </person-group>
+      <article-title>Prediction of physicochemical parameters by atomic contributions</article-title>
+      <source>Journal of chemical information and computer sciences</source>
+      <publisher-name>ACS Publications</publisher-name>
+      <year iso-8601-date="1999">1999</year>
+      <volume>39</volume>
+      <issue>5</issue>
+      <pub-id pub-id-type="doi">10.1021/ci990307l</pub-id>
+      <fpage>868</fpage>
+      <lpage>873</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-gasteiger1980iterative">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Gasteiger</surname><given-names>Johann</given-names></name>
+        <name><surname>Marsili</surname><given-names>Mario</given-names></name>
+      </person-group>
+      <article-title>Iterative partial equalization of orbital electronegativity—a rapid access to atomic charges</article-title>
+      <source>Tetrahedron</source>
+      <publisher-name>Elsevier</publisher-name>
+      <year iso-8601-date="1980">1980</year>
+      <volume>36</volume>
+      <issue>22</issue>
+      <pub-id pub-id-type="doi">10.1016/0040-4020(80)80168-2</pub-id>
+      <fpage>3219</fpage>
+      <lpage>3228</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-fawcett2006introduction">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Fawcett</surname><given-names>Tom</given-names></name>
+      </person-group>
+      <article-title>An introduction to ROC analysis</article-title>
+      <source>Pattern recognition letters</source>
+      <publisher-name>Elsevier</publisher-name>
+      <year iso-8601-date="2006">2006</year>
+      <volume>27</volume>
+      <issue>8</issue>
+      <pub-id pub-id-type="doi">10.1016/j.patrec.2005.10.010</pub-id>
+      <fpage>861</fpage>
+      <lpage>874</lpage>
+    </element-citation>
+  </ref>
+</ref-list>
+</back>
+</article>

Target	Morgan 2	MACCS	RDKit	OpenFEPOPS
akt1	0.836	0.741	0.833	0.831
ampc	0.784	0.673	0.660	0.639
cp3a4	0.603	0.582	0.613	0.647
cxcr4	0.697	0.854	0.592	0.899
gcr	0.670	0.666	0.708	0.616
hivpr	0.780	0.681	0.759	0.678
hivrt	0.651	0.670	0.660	0.582
kif11	0.763	0.668	0.672	0.713