From 728c7482655f48c8e28d79635a0f6bf1d96844ea Mon Sep 17 00:00:00 2001
From: The Open Journals editorial robot
 <89919391+editorialbot@users.noreply.github.com>
Date: Mon, 18 Mar 2024 12:28:00 +0000
Subject: [PATCH] Creating 10.21105.joss.06310.jats

---
 joss.06310/10.21105.joss.06310.jats | 748 ++++++++++++++++++++++++++++
 1 file changed, 748 insertions(+)
 create mode 100644 joss.06310/10.21105.joss.06310.jats
diff --git a/joss.06310/10.21105.joss.06310.jats b/joss.06310/10.21105.joss.06310.jats
new file mode 100644
index 0000000000..ce837ab966
--- /dev/null
+++ b/joss.06310/10.21105.joss.06310.jats
@@ -0,0 +1,748 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.2 20190208//EN"
+                  "JATS-publishing1.dtd">
+<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other">
+<front>
+<journal-meta>
+<journal-id></journal-id>
+<journal-title-group>
+<journal-title>Journal of Open Source Software</journal-title>
+<abbrev-journal-title>JOSS</abbrev-journal-title>
+</journal-title-group>
+<issn publication-format="electronic">2475-9066</issn>
+<publisher>
+<publisher-name>Open Journals</publisher-name>
+</publisher>
+</journal-meta>
+<article-meta>
+<article-id pub-id-type="publisher-id">6310</article-id>
+<article-id pub-id-type="doi">10.21105/joss.06310</article-id>
+<title-group>
+<article-title>Imbalance: A comprehensive multi-interface Julia toolbox
+to address class imbalance</article-title>
+</title-group>
+<contrib-group>
+<contrib contrib-type="author" equal-contrib="yes">
+<contrib-id contrib-id-type="orcid">https://orcid.org/0009-0009-1198-7166</contrib-id>
+<name>
+<surname>Wisam</surname>
+<given-names>Essam</given-names>
+</name>
+<xref ref-type="aff" rid="aff-1"/>
+</contrib>
+<contrib contrib-type="author" equal-contrib="yes">
+<contrib-id contrib-id-type="orcid">https://orcid.org/0000-0001-6689-886X</contrib-id>
+<name>
+<surname>Blaom</surname>
+<given-names>Anthony</given-names>
+</name>
+<xref ref-type="aff" rid="aff-2"/>
+</contrib>
+<aff id="aff-1">
+<institution-wrap>
+<institution>Cairo University, Egypt</institution>
+</institution-wrap>
+</aff>
+<aff id="aff-2">
+<institution-wrap>
+<institution>University of Auckland, New Zealand</institution>
+</institution-wrap>
+</aff>
+</contrib-group>
+<pub-date date-type="pub" publication-format="electronic" iso-8601-date="2023-10-17">
+<day>17</day>
+<month>10</month>
+<year>2023</year>
+</pub-date>
+<volume>9</volume>
+<issue>95</issue>
+<fpage>6310</fpage>
+<permissions>
+<copyright-statement>Authors of papers retain copyright and release the
+work under a Creative Commons Attribution 4.0 International License (CC
+BY 4.0)</copyright-statement>
+<copyright-year>2022</copyright-year>
+<copyright-holder>The article authors</copyright-holder>
+<license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
+<license-p>Authors of papers retain copyright and release the work under
+a Creative Commons Attribution 4.0 International License (CC BY
+4.0)</license-p>
+</license>
+</permissions>
+<kwd-group kwd-group-type="author">
+<kwd>machine learning</kwd>
+<kwd>classification</kwd>
+<kwd>class imbalance</kwd>
+<kwd>resampling</kwd>
+<kwd>oversampling</kwd>
+<kwd>undersampling</kwd>
+<kwd>julia</kwd>
+</kwd-group>
+</article-meta>
+</front>
+<body>
+<sec id="summary">
+  <title>Summary</title>
+  <p>Given a set of observations that each belong to a certain class,
+  supervised classification aims to learn a classification model that
+  can predict the class of a new, unlabeled observation
+  (<xref alt="Cunningham et al., 2008" rid="ref-CunninghamU003A2008" ref-type="bibr">Cunningham
+  et al., 2008</xref>). This modeling process finds extensive
+  application in real-life scenarios, including but not limited to
+  medical diagnostics, recommendation systems, credit scoring, and
+  sentiment analysis.</p>
+  <p>In various real-world scenarios where supervised classification is
+  employed, such as those pertaining to the detection of particular
+  conditions like fraud, faults, pollution, or rare diseases, a severe
+  discrepancy between the number of observations in each class can
+  occur. This is known as class imbalance. This poses a problem if
+  assumptions inherent in the classification model imply hindered
+  performance when the model is trained on imbalanced data as is
+  commonly the case
+  (<xref alt="Ali et al., 2015" rid="ref-AliU003A2015" ref-type="bibr">Ali
+  et al., 2015</xref>). Two prevalent strategies for mitigating class
+  imbalance, when it poses a problem to the classification model,
+  involve either increasing the representation of less frequently
+  occurring classes through oversampling or reducing instances of more
+  frequently occurring classes through undersampling. It may be also
+  possible to achieve even greater performance by combining both
+  approaches in a sequential pipeline
+  (<xref alt="Zeng et al., 2016" rid="ref-ZengU003A2016" ref-type="bibr">Zeng
+  et al., 2016</xref>) or by undersampling the data multiple times and
+  training the classification model on each resampled dataset to form an
+  ensemble model that aggregates results from different model instances
+  (<xref alt="Liu et al., 2009" rid="ref-LiuU003A2009" ref-type="bibr">Liu
+  et al., 2009</xref>). Contrary to undersampling, oversampling, or
+  their combination, the ensemble approach possesses the ability to
+  address class imbalance while making use of the entire dataset and
+  without generating synthetic data.</p>
+</sec>
+<sec id="statement-of-need">
+  <title>Statement of Need</title>
+  <p>A substantial body of literature in the field of machine learning
+  and statistics is devoted to addressing the class imbalance issue.
+  This predicament has often been aptly labeled the “curse of class
+  imbalance,” as noted in
+  (<xref alt="Picek et al., 2018" rid="ref-PicekU003A2018" ref-type="bibr">Picek
+  et al., 2018</xref>) and
+  (<xref alt="Kubát &amp; Matwin, 1997" rid="ref-KubtU003A1997" ref-type="bibr">Kubát
+  &amp; Matwin, 1997</xref>) which follows from the pervasive nature of
+  the issue across diverse real-world applications and its pronounced
+  severity; a classifier may incur an extraordinarily large performance
+  penalty in response to training on imbalanced data.</p>
+  <p>The literature encompasses a myriad of oversampling and
+  undersampling techniques to approach the class imbalance issue. These
+  include SMOTE
+  (<xref alt="Chawla et al., 2002" rid="ref-ChawlaU003A2002" ref-type="bibr">Chawla
+  et al., 2002</xref>) which operates by generating synthetic examples
+  along the lines joining existing ones, SMOTE-N and SMOTE-NC
+  (<xref alt="Chawla et al., 2002" rid="ref-ChawlaU003A2002" ref-type="bibr">Chawla
+  et al., 2002</xref>) which are variants of SMOTE that can handle
+  categorical data. The sheer number of SMOTE variants makes them a body
+  of literature on their own. Notably, the most widely cited variant of
+  SMOTE is BorderlineSMOTE
+  (<xref alt="Han et al., 2005" rid="ref-HanU003A2005" ref-type="bibr">Han
+  et al., 2005</xref>). Other well-established oversampling techniques
+  include RWO
+  (<xref alt="Zhang &amp; Li, 2014" rid="ref-ZhangU003A2014" ref-type="bibr">Zhang
+  &amp; Li, 2014</xref>) and ROSE
+  (<xref alt="Menardi &amp; Torelli, 2012" rid="ref-MenardiU003A2012" ref-type="bibr">Menardi
+  &amp; Torelli, 2012</xref>) which operate by estimating probability
+  densities and sampling from them to generate synthetic points. On the
+  other hand, the literature also encompasses many undersampling
+  techniques. Cluster undersampling
+  (<xref alt="Lin et al., 2016" rid="ref-LinU003A2016" ref-type="bibr">Lin
+  et al., 2016</xref>) and condensed nearest neighbors
+  (<xref alt="Hart, 1968" rid="ref-HartU003A1968" ref-type="bibr">Hart,
+  1968</xref>) are two prominent examples that attempt to reduce the
+  number of points while preserving the structure or classification
+  boundary of the data. Furthermore, methods that combine oversampling
+  and undersampling such as SMOTETomek
+  (<xref alt="Zeng et al., 2016" rid="ref-ZengU003A2016" ref-type="bibr">Zeng
+  et al., 2016</xref>) are also present. The motivation behind these
+  methods is that when undersampling is not random, it can filter out
+  noisy or irrelevant oversampled data. Lastly, resampling with ensemble
+  learning has also been presented in the literature with EasyEnsemble
+  being the most well-known approach of that type
+  (<xref alt="Liu et al., 2009" rid="ref-LiuU003A2009" ref-type="bibr">Liu
+  et al., 2009</xref>).</p>
+  <p>The existence of a toolbox with techniques that harness this wealth
+  of research is imperative to the development of novel approaches to
+  the class imbalance problem and for machine learning research broadly.
+  Aside from addressing class imbalance in a general machine learning
+  research setting, such a toolbox can help in class imbalance research
+  settings by making it possible to juxtapose different methods, compose
+  them together, or form variants of them without having to reimplement
+  them from scratch. In prevalent programming languages, such as Python,
+  a variety of such toolboxes already exist, such as imbalanced-learn
+  (<xref alt="Lemaître et al., 2016" rid="ref-LematreU003A2016" ref-type="bibr">Lemaître
+  et al., 2016</xref>) and SMOTE-variants
+  (<xref alt="Kovács, 2019" rid="ref-KovácsU003A2019" ref-type="bibr">Kovács,
+  2019</xref>). Meanwhile, Julia
+  (<xref alt="Bezanson et al., 2017" rid="ref-julia" ref-type="bibr">Bezanson
+  et al., 2017</xref>), a well-known programming language with over 40M
+  downloads
+  (<xref alt="Tuychiev, 2023" rid="ref-DataCampU003A2023" ref-type="bibr">Tuychiev,
+  2023</xref>), has been lacking a similar toolbox to address the class
+  imbalance issue in general multi-class and heterogeneous data
+  settings. This has served as the primary motivation for the creation
+  of the <monospace>Imbalance.jl</monospace> toolbox, which we introduce
+  in the subsequent section.</p>
+</sec>
+<sec id="imbalance.jl">
+  <title>Imbalance.jl</title>
+  <p>In this work, we present, <monospace>Imbalance.jl</monospace>, a
+  software toolbox implemented in the Julia programming language that
+  offers over 10 well-established techniques that help address the class
+  imbalance issue. Additionally, we present a companion package,
+  <monospace>MLJBalancing.jl</monospace>, which: (i) facilitates the
+  inclusion of resampling methods in pipelines with classification
+  models via the <monospace>BalancedModel</monospace> construct; and
+  (ii) implements a general version of the EasyEnsemble algorithm
+  presented in
+  (<xref alt="Liu et al., 2009" rid="ref-LiuU003A2009" ref-type="bibr">Liu
+  et al., 2009</xref>).</p>
+  <p>The toolbox offers a pure functional interface for each method
+  implemented. For example, <monospace>SMOTE</monospace> can be used in
+  the following fashion:</p>
+  <code language="julia">Xover, yover = smote(X, y)</code>
+  <p>Here <monospace>Xover, yover</monospace> are
+  <monospace>X, y</monospace> after oversampling.</p>
+  <p>A <monospace>ratios</monospace> hyperparameter or similar is always
+  present to control the degree of oversampling or undersampling to be
+  done for each class. All hyperparameters for a resampling method have
+  default values that can be overridden.</p>
+  <p>The set of resampling techniques implemented in either
+  <monospace>Imbalance.jl</monospace> or
+  <monospace>MLJBalancing.jl</monospace> are shown in the table below.
+  Note that although no combination resampling techniques are explicitly
+  presented, they are easy to form using the
+  <monospace>BalancedModel</monospace> wrapper found in
+  <monospace>MLJBalancing.jl</monospace> which can wrap an arbitrary
+  number of resamplers in sequence.</p>
+  <table-wrap>
+    <caption>
+      <p>Resampling techniques implemented in
+      <monospace>Imbalance.jl</monospace> and
+      <monospace>MLJBalancing.jl</monospace>. </p>
+    </caption>
+    <table>
+      <colgroup>
+        <col width="38%" />
+        <col width="20%" />
+        <col width="42%" />
+      </colgroup>
+      <thead>
+        <tr>
+          <th>Technique</th>
+          <th>Type</th>
+          <th>Supported Data Types</th>
+        </tr>
+      </thead>
+      <tbody>
+        <tr>
+          <td>BalancedBaggingClassifier</td>
+          <td>Ensemble</td>
+          <td>Continuous and/or nominal</td>
+        </tr>
+        <tr>
+          <td>Borderline SMOTE1</td>
+          <td>Oversampling</td>
+          <td>Continuous</td>
+        </tr>
+        <tr>
+          <td>Cluster Undersampler</td>
+          <td>Undersampling</td>
+          <td>Continuous</td>
+        </tr>
+        <tr>
+          <td>Edited Nearest Neighbors Undersampler</td>
+          <td>Undersampling</td>
+          <td>Continuous</td>
+        </tr>
+        <tr>
+          <td>Random Oversampler</td>
+          <td>Oversampling</td>
+          <td>Continuous and/or nominal</td>
+        </tr>
+        <tr>
+          <td>Random Undersampler</td>
+          <td>Undersampling</td>
+          <td>Continuous and/or nominal</td>
+        </tr>
+        <tr>
+          <td>Random Walk Oversampler</td>
+          <td>Oversampling</td>
+          <td>Continuous and/or nominal</td>
+        </tr>
+        <tr>
+          <td>ROSE</td>
+          <td>Oversampling</td>
+          <td>Continuous</td>
+        </tr>
+        <tr>
+          <td>SMOTE</td>
+          <td>Oversampling</td>
+          <td>Continuous</td>
+        </tr>
+        <tr>
+          <td>SMOTE-N</td>
+          <td>Oversampling</td>
+          <td>Nominal</td>
+        </tr>
+        <tr>
+          <td>SMOTE-NC</td>
+          <td>Oversampling</td>
+          <td>Continuous and nominal</td>
+        </tr>
+        <tr>
+          <td>Tomek Links Undersampler</td>
+          <td>Undersampling</td>
+          <td>Continuous</td>
+        </tr>
+      </tbody>
+    </table>
+  </table-wrap>
+  <sec id="imbalance.jl-design-principles">
+    <title>Imbalance.jl Design Principles</title>
+    <p>The toolbox implementation follows a specific set of design
+    principles in terms of the implemented techniques, interface
+    support, developer experience and testing, and user experience.</p>
+    <sec id="implemented-techniques">
+      <title>Implemented Techniques</title>
+      <list list-type="bullet">
+        <list-item>
+          <p>Should support all four major types of resampling
+          approaches (oversampling, undersampling, combination,
+          ensemble)</p>
+        </list-item>
+        <list-item>
+          <p>Should be generally compatible with multi-class
+          settings</p>
+        </list-item>
+        <list-item>
+          <p>Should offer solutions to heterogeneous data settings
+          (continuous and nominal data)</p>
+        </list-item>
+        <list-item>
+          <p>When possible, preference should be given to techniques
+          that are more common in the literature or industry</p>
+        </list-item>
+      </list>
+      <p>Methods implemented in the <monospace>Imbalance.jl</monospace>
+      toolbox indeed meet all aforementioned design principles for the
+      implemented techniques. The one-vs-rest scheme as proposed in
+      (<xref alt="Fernández et al., 2013" rid="ref-FernándezU003A2013" ref-type="bibr">Fernández
+      et al., 2013</xref>) was used to generalize binary technique to
+      multi-class when needed.</p>
+    </sec>
+    <sec id="interface-support">
+      <title>Interface Support</title>
+      <list list-type="bullet">
+        <list-item>
+          <p>Should support both matrix and table type inputs</p>
+        </list-item>
+        <list-item>
+          <p>Target variable may or may not be given as a separate
+          column</p>
+        </list-item>
+        <list-item>
+          <p>Should expose a pure functional implementation, but also
+          support popular Julia machine learning interfaces</p>
+        </list-item>
+        <list-item>
+          <p>Should be possible to wrap an arbitrary number of resampler
+          models with a classification model to behave as a unified
+          model</p>
+        </list-item>
+      </list>
+      <p>Methods implemented in the <monospace>Imbalance.jl</monospace>
+      toolbox meet all the interface design principles above. It
+      particularly implements the <monospace>MLJ</monospace>
+      (<xref alt="Blaom et al., 2020" rid="ref-Blaom2020MLJAJ" ref-type="bibr">Blaom
+      et al., 2020</xref>) and <monospace>TableTransforms</monospace>
+      interface for each method. <monospace>BalancedModel</monospace>
+      from <monospace>MLJBalancing.jl</monospace> also allows fusing an
+      arbitrary number of resampling models and a classifier together to
+      behave as one unified model.</p>
+    </sec>
+    <sec id="developer-experience-and-testing">
+      <title>Developer Experience and Testing</title>
+      <list list-type="bullet">
+        <list-item>
+          <p>There should exist a developer guide to encourage and guide
+          contribution</p>
+        </list-item>
+        <list-item>
+          <p>Functions should be implemented in smaller units to aid in
+          testing</p>
+        </list-item>
+        <list-item>
+          <p>Testing coverage should be maximized; even the most basic
+          functions should be tested</p>
+        </list-item>
+        <list-item>
+          <p>Features commonly used by multiple resampling techniques
+          should be implemented in a single function and reused</p>
+        </list-item>
+        <list-item>
+          <p>Should document all functions, including internal ones</p>
+        </list-item>
+        <list-item>
+          <p>Comments should be included to justify or simplify written
+          implementations when needed</p>
+        </list-item>
+      </list>
+      <p>This set of design principles is also satisfied by
+      <monospace>Imbalance.jl</monospace>. Implemented techniques are
+      tested by testing smaller units that form them. Aside from that,
+      end-to-end tests are performed for each technique by testing
+      properties and characteristics of the technique or by using the
+      <monospace>imbalanced-learn</monospace> toolbox
+      (<xref alt="Lemaître et al., 2016" rid="ref-LematreU003A2016" ref-type="bibr">Lemaître
+      et al., 2016</xref>) from Python and comparing outputs.</p>
+    </sec>
+    <sec id="user-experience">
+      <title>User Experience</title>
+      <list list-type="bullet">
+        <list-item>
+          <p>Functional documentation should be comprehensive and
+          clear</p>
+        </list-item>
+        <list-item>
+          <p>Examples (with shown output) that work after copy-pasting
+          should accompany each method</p>
+        </list-item>
+        <list-item>
+          <p>An illustrative visual example that presents a plot or
+          animation should preferably accompany each method</p>
+        </list-item>
+        <list-item>
+          <p>A practical example that uses the method with real data
+          should preferably accompany each method</p>
+        </list-item>
+        <list-item>
+          <p>If an implemented method lacks an online explanation, an
+          article that explains the method after it is implemented
+          should be preferably written</p>
+        </list-item>
+      </list>
+      <p>The <monospace>Imbalance.jl</monospace> documentation indeed
+      satisfies this set of design principles. Methods are each
+      associated with an example that can be copy-pasted, a visual
+      example that demonstrates the operation of the technique, and
+      possibly, an example that utilizes it with a real-world dataset to
+      improve the performance of a classification model.</p>
+    </sec>
+  </sec>
+  <sec id="author-contributions">
+    <title>Author Contributions</title>
+    <p>Design: E. Wisam, A. Blaom. Implementation, tests and
+    documentation: E. Wisam. Code and documentation review: A. Blaom.
+    The authors would like to acknowledge the financial support provided
+    by the Google Summer of Code program, which made this project
+    possible.</p>
+  </sec>
+</sec>
+</body>
+<back>
+<ref-list>
+  <ref id="ref-julia">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Bezanson</surname><given-names>Jeff</given-names></name>
+        <name><surname>Edelman</surname><given-names>Alan</given-names></name>
+        <name><surname>Karpinski</surname><given-names>Stefan</given-names></name>
+        <name><surname>Shah</surname><given-names>Viral B.</given-names></name>
+      </person-group>
+      <article-title>Julia: A fresh approach to numerical computing</article-title>
+      <source>SIAM Review</source>
+      <publisher-name>Society for Industrial &amp; Applied Mathematics (SIAM)</publisher-name>
+      <year iso-8601-date="2017-01">2017</year><month>01</month>
+      <volume>59</volume>
+      <issue>1</issue>
+      <uri>https://doi.org/10.1137%2F141000671</uri>
+      <pub-id pub-id-type="doi">10.1137/141000671</pub-id>
+      <fpage>65</fpage>
+      <lpage>98</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-CunninghamU003A2008">
+    <element-citation publication-type="chapter">
+      <person-group person-group-type="author">
+        <name><surname>Cunningham</surname><given-names>Pádraig</given-names></name>
+        <name><surname>Cord</surname><given-names>Matthieu</given-names></name>
+        <name><surname>Delany</surname><given-names>Sarah Jane</given-names></name>
+      </person-group>
+      <article-title>Supervised learning</article-title>
+      <source>Machine learning techniques for multimedia: Case studies on organization and retrieval</source>
+      <person-group person-group-type="editor">
+        <name><surname>Cord</surname><given-names>Matthieu</given-names></name>
+        <name><surname>Cunningham</surname><given-names>Pádraig</given-names></name>
+      </person-group>
+      <publisher-name>Springer Berlin Heidelberg</publisher-name>
+      <publisher-loc>Berlin, Heidelberg</publisher-loc>
+      <year iso-8601-date="2008">2008</year>
+      <isbn>978-3-540-75171-7</isbn>
+      <uri>https://doi.org/10.1007/978-3-540-75171-7_2</uri>
+      <pub-id pub-id-type="doi">10.1007/978-3-540-75171-7_2</pub-id>
+      <fpage>21</fpage>
+      <lpage>49</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-AliU003A2015">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Ali</surname><given-names>Aida</given-names></name>
+        <name><surname>Shamsuddin</surname><given-names>Siti Mariyam Hj.</given-names></name>
+        <name><surname>Ralescu</surname><given-names>Anca L.</given-names></name>
+      </person-group>
+      <article-title>Classification with class imbalance problem: A review</article-title>
+      <source>Soft computing models in industrial and environmental applications</source>
+      <year iso-8601-date="2015">2015</year>
+      <uri>https://api.semanticscholar.org/CorpusID:26644563</uri>
+    </element-citation>
+  </ref>
+  <ref id="ref-ZengU003A2016">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Zeng</surname><given-names>Min</given-names></name>
+        <name><surname>Zou</surname><given-names>Beiji</given-names></name>
+        <name><surname>Wei</surname><given-names>Faran</given-names></name>
+        <name><surname>Liu</surname><given-names>Xiyao</given-names></name>
+        <name><surname>Wang</surname><given-names>Lei</given-names></name>
+      </person-group>
+      <article-title>Effective prediction of three common diseases by combining SMOTE with tomek links technique for imbalanced medical data</article-title>
+      <source>2016 IEEE International Conference of Online Analysis and Computing Science (ICOACS)</source>
+      <year iso-8601-date="2016">2016</year>
+      <uri>https://api.semanticscholar.org/CorpusID:25184489</uri>
+      <pub-id pub-id-type="doi">10.1109/ICOACS.2016.7563084</pub-id>
+      <fpage>225</fpage>
+      <lpage>228</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-LiuU003A2009">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Liu</surname><given-names>Xu-Ying</given-names></name>
+        <name><surname>Wu</surname><given-names>Jianxin</given-names></name>
+        <name><surname>Zhou</surname><given-names>Zhi-Hua</given-names></name>
+      </person-group>
+      <article-title>Exploratory undersampling for class-imbalance learning</article-title>
+      <source>IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics)</source>
+      <year iso-8601-date="2009">2009</year>
+      <volume>39</volume>
+      <uri>https://api.semanticscholar.org/CorpusID:62808464</uri>
+      <pub-id pub-id-type="doi">10.1109/TSMCB.2008.2007853</pub-id>
+      <fpage>539</fpage>
+      <lpage>550</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-PicekU003A2018">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Picek</surname><given-names>Stjepan</given-names></name>
+        <name><surname>Heuser</surname><given-names>Annelie</given-names></name>
+        <name><surname>Jović</surname><given-names>Alan</given-names></name>
+        <name><surname>Bhasin</surname><given-names>Shivam</given-names></name>
+        <name><surname>Regazzoni</surname><given-names>Francesco</given-names></name>
+      </person-group>
+      <article-title>The curse of class imbalance and conflicting metrics with machine learning for side-channel evaluations</article-title>
+      <source>IACR Trans. Cryptogr. Hardw. Embed. Syst.</source>
+      <year iso-8601-date="2018">2018</year>
+      <volume>2019</volume>
+      <uri>https://api.semanticscholar.org/CorpusID:44136202</uri>
+      <pub-id pub-id-type="doi">10.13154/tches.v2019.i1.209-237</pub-id>
+      <fpage>209</fpage>
+      <lpage>237</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-KubtU003A1997">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Kubát</surname><given-names>Miroslav</given-names></name>
+        <name><surname>Matwin</surname><given-names>Stan</given-names></name>
+      </person-group>
+      <article-title>Addressing the curse of imbalanced training sets: One-sided selection</article-title>
+      <source>International conference on machine learning</source>
+      <year iso-8601-date="1997">1997</year>
+      <uri>https://api.semanticscholar.org/CorpusID:18370956</uri>
+    </element-citation>
+  </ref>
+  <ref id="ref-ChawlaU003A2002">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Chawla</surname><given-names>N.</given-names></name>
+        <name><surname>Bowyer</surname><given-names>K.</given-names></name>
+        <name><surname>Hall</surname><given-names>Lawrence O.</given-names></name>
+        <name><surname>Kegelmeyer</surname><given-names>W. Philip</given-names></name>
+      </person-group>
+      <article-title>SMOTE: Synthetic minority over-sampling technique</article-title>
+      <source>ArXiv</source>
+      <year iso-8601-date="2002">2002</year>
+      <volume>abs/1106.1813</volume>
+      <uri>https://api.semanticscholar.org/CorpusID:1554582</uri>
+      <pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
+    </element-citation>
+  </ref>
+  <ref id="ref-HanU003A2005">
+    <element-citation publication-type="paper-conference">
+      <person-group person-group-type="author">
+        <name><surname>Han</surname><given-names>Hui</given-names></name>
+        <name><surname>Wang</surname><given-names>Wenyuan</given-names></name>
+        <name><surname>Mao</surname><given-names>Binghuan</given-names></name>
+      </person-group>
+      <article-title>Borderline-SMOTE: A new over-sampling method in imbalanced data sets learning</article-title>
+      <source>International conference on intelligent computing</source>
+      <year iso-8601-date="2005">2005</year>
+      <uri>https://api.semanticscholar.org/CorpusID:12126950</uri>
+      <pub-id pub-id-type="doi">10.1007/11538059_91</pub-id>
+    </element-citation>
+  </ref>
+  <ref id="ref-ZhangU003A2014">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Zhang</surname><given-names>Huaxiang</given-names></name>
+        <name><surname>Li</surname><given-names>Mingfang</given-names></name>
+      </person-group>
+      <article-title>RWO-sampling: A random walk over-sampling approach to imbalanced data classification</article-title>
+      <source>Inf. Fusion</source>
+      <year iso-8601-date="2014">2014</year>
+      <volume>20</volume>
+      <uri>https://api.semanticscholar.org/CorpusID:205432428</uri>
+      <pub-id pub-id-type="doi">10.1016/j.inffus.2013.12.003</pub-id>
+      <fpage>99</fpage>
+      <lpage>116</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-MenardiU003A2012">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Menardi</surname><given-names>Giovanna</given-names></name>
+        <name><surname>Torelli</surname><given-names>Nicola</given-names></name>
+      </person-group>
+      <article-title>Training and assessing classification rules with imbalanced data</article-title>
+      <source>Data Mining and Knowledge Discovery</source>
+      <year iso-8601-date="2012">2012</year>
+      <volume>28</volume>
+      <uri>https://api.semanticscholar.org/CorpusID:18164904</uri>
+      <pub-id pub-id-type="doi">10.1007/s10618-012-0295-5</pub-id>
+      <fpage>92</fpage>
+      <lpage>122</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-LinU003A2016">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Lin</surname><given-names>Wei-Chao</given-names></name>
+        <name><surname>Tsai</surname><given-names>Chih-Fong</given-names></name>
+        <name><surname>Hu</surname><given-names>Ya-Han</given-names></name>
+        <name><surname>Jhang</surname><given-names>Jing-Shang</given-names></name>
+      </person-group>
+      <article-title>Clustering-based undersampling in class-imbalanced data</article-title>
+      <source>Inf. Sci.</source>
+      <year iso-8601-date="2016">2016</year>
+      <volume>409</volume>
+      <uri>https://api.semanticscholar.org/CorpusID:424467</uri>
+      <pub-id pub-id-type="doi">10.1016/j.ins.2017.05.008</pub-id>
+      <fpage>17</fpage>
+      <lpage>26</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-HartU003A1968">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Hart</surname><given-names>Peter E.</given-names></name>
+      </person-group>
+      <article-title>The condensed nearest neighbor rule (corresp.)</article-title>
+      <source>IEEE Trans. Inf. Theory</source>
+      <year iso-8601-date="1968">1968</year>
+      <volume>14</volume>
+      <uri>https://api.semanticscholar.org/CorpusID:206729609</uri>
+      <pub-id pub-id-type="doi">10.1109/TIT.1968.1054155</pub-id>
+      <fpage>515</fpage>
+      <lpage>516</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-LematreU003A2016">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Lemaître</surname><given-names>Guillaume</given-names></name>
+        <name><surname>Nogueira</surname><given-names>Fernando</given-names></name>
+        <name><surname>Aridas</surname><given-names>Christos K.</given-names></name>
+      </person-group>
+      <article-title>Imbalanced-learn: A Python toolbox to tackle the curse of imbalanced datasets in machine learning</article-title>
+      <source>ArXiv</source>
+      <year iso-8601-date="2016">2016</year>
+      <volume>abs/1609.06570</volume>
+      <uri>https://api.semanticscholar.org/CorpusID:1426815</uri>
+    </element-citation>
+  </ref>
+  <ref id="ref-KovácsU003A2019">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Kovács</surname><given-names>György</given-names></name>
+      </person-group>
+      <article-title>Smote-variants: A Python implementation of 85 minority oversampling techniques</article-title>
+      <source>Neurocomputing</source>
+      <year iso-8601-date="2019">2019</year>
+      <volume>366</volume>
+      <issn>0925-2312</issn>
+      <uri>https://www.sciencedirect.com/science/article/pii/S0925231219311622</uri>
+      <pub-id pub-id-type="doi">10.1016/j.neucom.2019.06.100</pub-id>
+      <fpage>352</fpage>
+      <lpage>354</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-DataCampU003A2023">
+    <element-citation publication-type="webpage">
+      <person-group person-group-type="author">
+        <name><surname>Tuychiev</surname><given-names>Bekhruz</given-names></name>
+      </person-group>
+      <article-title>The rise of Julia</article-title>
+      <year iso-8601-date="2023">2023</year>
+      <uri>https://www.datacamp.com/blog/the-rise-of-julia-is-it-worth-learning-in-2022</uri>
+    </element-citation>
+  </ref>
+  <ref id="ref-FernándezU003A2013">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Fernández</surname><given-names>Alberto</given-names></name>
+        <name><surname>López</surname><given-names>Victoria</given-names></name>
+        <name><surname>Galar</surname><given-names>Mikel</given-names></name>
+        <name><surname>Jesús</surname><given-names>María José del</given-names></name>
+        <name><surname>Herrera</surname><given-names>Francisco</given-names></name>
+      </person-group>
+      <article-title>Analysing the classification of imbalanced data-sets with multiple classes: Binarization techniques and ad-hoc approaches</article-title>
+      <source>Knowl. Based Syst.</source>
+      <year iso-8601-date="2013">2013</year>
+      <volume>42</volume>
+      <uri>https://api.semanticscholar.org/CorpusID:131286</uri>
+      <pub-id pub-id-type="doi">10.1016/J.KNOSYS.2013.01.018</pub-id>
+      <fpage>97</fpage>
+      <lpage>110</lpage>
+    </element-citation>
+  </ref>
+  <ref id="ref-Blaom2020MLJAJ">
+    <element-citation publication-type="article-journal">
+      <person-group person-group-type="author">
+        <name><surname>Blaom</surname><given-names>Anthony D.</given-names></name>
+        <name><surname>Király</surname><given-names>Franz J.</given-names></name>
+        <name><surname>Lienart</surname><given-names>Thibaut</given-names></name>
+        <name><surname>Simillides</surname><given-names>Yiannis</given-names></name>
+        <name><surname>Arenas</surname><given-names>Diego</given-names></name>
+        <name><surname>Vollmer</surname><given-names>Sebastian J.</given-names></name>
+      </person-group>
+      <article-title>MLJ: A julia package for composable machine learning</article-title>
+      <source>J. Open Source Softw.</source>
+      <year iso-8601-date="2020">2020</year>
+      <volume>5</volume>
+      <uri>https://api.semanticscholar.org/CorpusID:220768685</uri>
+      <pub-id pub-id-type="doi">10.21105/joss.02704</pub-id>
+      <fpage>2704</fpage>
+      <lpage></lpage>
+    </element-citation>
+  </ref>
+</ref-list>
+</back>
+</article>

Technique	Type	Supported Data Types
BalancedBaggingClassifier	Ensemble	Continuous and/or nominal
Borderline SMOTE1	Oversampling	Continuous
Cluster Undersampler	Undersampling	Continuous
Edited Nearest Neighbors Undersampler	Undersampling	Continuous
Random Oversampler	Oversampling	Continuous and/or nominal
Random Undersampler	Undersampling	Continuous and/or nominal
Random Walk Oversampler	Oversampling	Continuous and/or nominal
ROSE	Oversampling	Continuous
SMOTE	Oversampling	Continuous
SMOTE-N	Oversampling	Nominal
SMOTE-NC	Oversampling	Continuous and nominal
Tomek Links Undersampler	Undersampling	Continuous