update

sametcopur · Oct 26, 2024 · 34b01cc · 34b01cc
1 parent df6fafb
commit 34b01cc
Show file tree

Hide file tree

Showing 5 changed files with 78 additions and 70 deletions.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,6 +1,7 @@
-numpy>=1.25.2
-pandas>=2.0.3
-scipy>=1.11.4
-myst-parser>=2.0.0
-sphinx_rtd_theme>=2.0.0
-sphinx_autodoc_typehints
+numpy>=1.26.4
+pandas>=2.2.2
+matplotlib>=3.9.2
+seaborn>=0.13.
+sphinx_rtd_theme
+sphinx_autodoc_typehints
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,8 +3,10 @@ name = "treemind"
 version = "0.0.1"
 
 dependencies = [
-    "numpy>=1.25.2",
-    "pandas>=2.0.3",
+    "numpy>=1.26.4",
+    "pandas>=2.2.2",
+    "matplotlib>=3.9.2",
+    "seaborn>=0.13.2"
 ]
 authors = [
     { name = "Ilker Birbil", email = '[email protected]' },

diff --git a/setup.py b/setup.py
@@ -8,7 +8,6 @@
     extra_compile_args = [
         "/O2",
         "/fp:fast",
-        "/arch:AVX2",
         "/GL",
         "/Ot",
         "/Ox",

diff --git a/treemind/algorithm/explainer.pyi b/treemind/algorithm/explainer.pyi
@@ -5,9 +5,9 @@ from typing import Union, Tuple, List, Any
 
 class Explainer:
     """
-    The Explainer class provides methods to analyze and interpret a trained model by examining 
-    feature dependencies, split points, interaction effects, and predicted values. This class 
-    enables detailed inspection of how individual features and their interactions impact model 
+    The Explainer class provides methods to analyze and interpret a trained model by examining
+    feature dependencies, split points, interaction effects, and predicted values. This class
+    enables detailed inspection of how individual features and their interactions impact model
     predictions, allowing for a clearer understanding of the model's decision-making process.
     """
 
@@ -19,29 +19,33 @@ class Explainer:
         ----------
         main_col : int
             The column index of the main feature to analyze.
+
         sub_col : int
             The column index of the sub feature with which to analyze the dependency.
 
         Returns
         -------
         pd.DataFrame
             A DataFrame containing the following columns:
+
             - `main_feature_lb`: Lower bound for the main feature interval (automatically named by the model).
+
             - `main_feature_ub`: Upper bound for the main feature interval (automatically named by the model, inclusive).
+
             - `sub_feature_lb`: Lower bound for the sub feature interval.
+
             - `sub_feature_ub`: Upper bound for the sub feature interval, inclusive.
+
             - `value`: A value indicating the interaction effect or dependency strength between the main and sub features within the specified interval combination.
 
         Notes
         -----
-        - The naming of the `main_feature_lb`, `main_feature_ub`, `sub_feature_lb`, and `sub_feature_ub` columns 
-        is model-determined. If the column names are unspecified during training, they are auto-assigned based on indices.
-        - Each row in the output DataFrame represents a unique combination of intervals between the main and sub 
-        features, showing the value associated with the interaction within these intervals.
+        - The naming of the `main_feature_lb`, `main_feature_ub`, `sub_feature_lb`, and `sub_feature_ub` columns is model-determined. If the column names are unspecified during training, they are auto-assigned based on indices.
 
+        - Each row in the output DataFrame represents a unique combination of intervals between the main and sub features, showing the value associated with the interaction within these intervals.
         """
         ...
-    
+
     def __call__(self, model: Any) -> None:
         """
         Invokes the Explainer instance with a model to perform analysis.
@@ -50,13 +54,13 @@ class Explainer:
         ----------
         model : Any
             A trained model instance.
-        
+
         Returns
         -------
         None
         """
         ...
-    
+
     def analyze_data(
         self, x: ArrayLike, detailed: bool = False
     ) -> Union[Tuple[np.ndarray, List[np.ndarray], float], Tuple[np.ndarray, float]]:
@@ -66,50 +70,44 @@ class Explainer:
         Parameters
         ----------
         x : ArrayLike
-            Input data for analysis. The data type of `x` should be compatible with the trained model, 
-            which can accept any type that matches its input requirements. Note that `x` must be 
-            two-dimensional; single-dimensional arrays are not accepted. If input is intended to 
+            Input data for analysis. The data type of `x` should be compatible with the trained model,
+            which can accept any type that matches its input requirements. Note that `x` must be
+            two-dimensional; single-dimensional arrays are not accepted. If input is intended to
             be row-based, it must have the appropriate shape.
-            
+
         detailed : bool, optional
-            If True, the function returns detailed split points for each feature. If False, only 
+            If True, the function returns detailed split points for each feature. If False, only
             basic output is returned. Default is False.
 
         Returns
         -------
         Union[Tuple[np.ndarray, List[np.ndarray], float], Tuple[np.ndarray, float]]
             The output depends on the `detailed` parameter:
-            
+
             - If `detailed` is False:
-            The function returns a tuple containing:
-                - `values` : np.ndarray 
-                    A single-dimensional array where each element represents the effect (positive or 
-                    negative) of each feature in `x`. Each index corresponds to a feature column in `x`.
+                The function returns a tuple containing:
+
+                - `values` : np.ndarray
+                    A single-dimensional array where each element represents the effect (positive or negative) of each feature in `x`. Each index corresponds to a feature column in `x`.
+
                 - `raw_score` : float
-                    The mean of the predictions obtained by inputting `x` into the model. This raw 
-                    score reflects the average output based on `x`.
+                    The mean of the predictions obtained by inputting `x` into the model. This raw score reflects the average output based on `x`.
 
             - If `detailed` is True:
-            The function returns a tuple containing:
-                - `values` : np.ndarray 
-                    A two-dimensional array with shape (n_col, max_split_num_feature). Initially, all 
-                    values are set to 0. For each feature, the array contains values up to the number 
-                    of splits for that feature. For example, if a feature has 10 splits and the 
-                    maximum split count is 30, the first 10 elements will have values, while the rest 
-                    remain 0. To determine the number of splits for a feature, use `len(split_points[i])`.
-                    
+                The function returns a tuple containing:
+
+                - `values` : np.ndarray
+                    A two-dimensional array with shape (n_col, max_split_num_feature). Initially, all values are set to 0. For each feature, the array contains values up to the number of splits for that feature. For example, if a feature has 10 splits and the maximum split count is 30, the first 10 elements will have values, while the rest remain 0. To determine the number of splits for a feature, use `len(split_points[i])`.
+
                 - `split_points` : List[np.ndarray]
-                    A list where each element is an array representing the split points for each feature.
-                    Each array details the split points where the feature was divided. For example, 
-                    if a feature splits at 10 different points, the array for that feature contains 
-                    those 10 split values.
-                    
+                    A list where each element is an array representing the split points for each feature. Each array details the split points where the feature was divided. For example, if a feature splits at 10 different points, the array for that feature contains those 10 split values.
+
                 - `raw_score` : float
-                    Similar to the non-detailed case, this represents the mean score of `x` when 
-                    evaluated by the model.
+                    Similar to the non-detailed case, this represents the mean score of `x` when evaluated by the model.
+
         """
         ...
-    
+
     def analyze_feature(self, col: int) -> pd.DataFrame:
         """
         Analyzes a specific feature by calculating the mean, min, and max values
@@ -136,7 +134,7 @@ class Explainer:
         If no column names are specified during the training phase, they are automatically indexed by the model.
         """
         ...
-        
+
     def count_node(self, interaction: bool = True) -> pd.DataFrame:
         """
         Counts how often features (or pairs of features if interaction is True) appear in decision splits across the model's trees.
@@ -150,15 +148,22 @@ class Explainer:
         Returns
         -------
         pd.DataFrame
-            If interaction is True:
-                A DataFrame with the following columns:
+            The output depends on the `interaction` parameter:
+
+            - If `interaction` is True:
+                The function returns a DataFrame with the following columns:
+
                 - `column1_index` (int): Index of the first feature.
+
                 - `column2_index` (int): Index of the second feature.
+
                 - `count` (int): Number of times the feature pair appears together in splits.
-            If interaction is False:
-                A DataFrame with the following columns:
+
+            - If `interaction` is False:
+                The function returns a DataFrame with the following columns:
+
                 - `column_index` (int): Index of the feature.
+
                 - `count` (int): Number of times the feature appears in splits.
         """
         ...
-
diff --git a/treemind/plot/plot_funcs.py b/treemind/plot/plot_funcs.py
@@ -52,8 +52,7 @@ def bar_plot(
         feature indices are labeled as "Column X" for each feature.
     max_col : int or None, optional, default=20
         The maximum number of features to display in the plot, chosen based on 
-        their absolute contribution values. If `None`, all features with non-zero
-        contributions will be shown.
+        their absolute contribution values. If `None`, all features will be shown.
     title : str or None, optional
         The title displayed at the top of the plot. If `None`, no title is shown.
     title_fontsize : float, optional, default=12.0
@@ -235,24 +234,26 @@ def range_plot(
         The raw score associated with the values, displayed in the plot's upper right.
     split_points : List[np.ndarray[float]]
         A list of point intervals corresponding to the values in each row.
-    scale : float, optional
-        Scaling factor for figure size and font sizes, by default 1.
-    columns : List[str], optional
-        A list of column names for labeling rows; if None, uses row indices.
-    max_col : int, optional
-        Maximum number of rows to display after sorting. If None, all rows are shown.
+    scale : float, optional, default 2.0
+        Scaling factor for figure size
+    columns : list or ArrayLike, optional
+        A list of names for the features, used as labels on the y-axis. If `None`,
+        feature indices are labeled as "Column X" for each feature.
+    max_col : int or None, optional, default=20
+        The maximum number of features to display in the plot, chosen based on 
+        their absolute contribution values. If `None`, all features will be shown.
     title : str or None, optional
         The title displayed at the top of the plot. If `None`, no title is shown.
-    label_fontsize : float, optional
-        Font size for the y-axis labels, default is 9.
+    label_fontsize : float, optional, default is 9.0
+        Font size for the y-axis labels
     title_fontsize : float, optional, default 12.0
-        Font size for the plot title.
-    interval_fontsize : float, optional
-        Font size for interval labels displayed on each bar, default is 4.5.
-    value_fontsize : float, optional
-        Font size for value labels displayed below each bar, default is 5.5.
-    show_raw_score : bool, optional
-        If True, displays the raw score in the plot; default is True.
+        Font size for the plot title
+    interval_fontsize : float, optional, default 4.5.
+        Font size for interval labels displayed on each bar,
+    value_fontsize : float, optional, default 5.5.
+        Font size for value labels displayed below each bar
+    show_raw_score : bool, optional, default True
+        If True, displays the raw score in the plot
 
     Returns
     -------
-Original file line number
+Diff line change
@@ Expand Up / @@ -8,7 +8,6 @@ @@
         extra_compile_args = [
             "/O2",
             "/fp:fast",
-            "/arch:AVX2",
             "/GL",
             "/Ot",
             "/Ox",
@@ Expand Down @@