Skip to content

Commit

Permalink
Remove the exact p-value compute
Browse files Browse the repository at this point in the history
  • Loading branch information
adriencrtrcap committed Sep 26, 2024
1 parent 662aa64 commit c44f7b4
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 40 deletions.
15 changes: 0 additions & 15 deletions examples/tutorials/plot_tuto_mcar.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,18 +394,3 @@
# As a result, by removing the missing patterns induced by variable 2, the p-value rises
# above the significance threshold set beforehand. Thus in this sense, the test detects that the
# main culprit of the MAR mechanism lies in the second variable.


# %%
# Calculation time -> TO BE DELETED
# | **n_rows** | **n_cols** | **Calculation_time** |
# |------------|------------|----------------------|
# | 200 | 2 | 2"12 |
# | 500 | 2 | 2"24 |
# | 500 | 4 | 2"18 |
# | 1000 | 4 | 2"48 |
# | 1000 | 6 | 2"42 |
# | 10000 | 6 | 20"54 |
# | 10000 | 10 | 14"48 |
# | 100000 | 10 | 4'51" |
# | 100000 | 15 | 3'06" |
26 changes: 1 addition & 25 deletions qolmat/analysis/holes_characterization.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,6 @@ class PKLMTest(McarTest):
Number of trees per projection.
compute_partial_p_values : bool
If true, compute the partial p-values.
exact_p_value : bool
If True, compute exact p-value.
encoder : OneHotEncoder or None, default=None
Encoder to convert non numeric pandas dataframe values to numeric
values.
Expand All @@ -206,7 +204,6 @@ def __init__(
nb_permutation: int = 30,
nb_trees_per_proj: int = 200,
compute_partial_p_values: bool = False,
exact_p_value: bool = False,
encoder: Union[None, OneHotEncoder] = None,
random_state: Union[None, int, np.random.RandomState] = None,
):
Expand All @@ -216,14 +213,8 @@ def __init__(
self.nb_permutation = nb_permutation
self.nb_trees_per_proj = nb_trees_per_proj
self.compute_partial_p_values = compute_partial_p_values
self.exact_p_value = exact_p_value
self.encoder = encoder

if self.exact_p_value:
self.process_permutation = self._parallel_process_permutation_exact
else:
self.process_permutation = self._parallel_process_permutation

def _encode_dataframe(self, df: pd.DataFrame) -> np.ndarray:
"""Encode the DataFrame.
Expand Down Expand Up @@ -593,21 +584,6 @@ def _parallel_process_permutation(
y = self._build_label(X, M_perm, features_idx, target_idx)
return self._U_hat(oob_probabilities, y)

def _parallel_process_permutation_exact(
self,
X: np.ndarray,
M_perm: np.ndarray,
features_idx: np.ndarray,
target_idx: int,
oob_probabilites_unused: np.ndarray,
) -> float:
X_features, _ = self._build_dataset(X, features_idx, target_idx)
y = self._build_label(X, M_perm, features_idx, target_idx)
# In this case, we fit the classifier in each permutation.
# It takes much more longer.
oob_probabilities = self._get_oob_probabilities(X_features, y)
return self._U_hat(oob_probabilities, y)

def _parallel_process_projection(
self,
X: np.ndarray,
Expand All @@ -622,7 +598,7 @@ def _parallel_process_projection(
# We fit only one classifier to get oob probabilities and compute u_hat
# nb_permutations times.
result_u_permutations = Parallel(n_jobs=-1)(
delayed(self.process_permutation)(
delayed(self._parallel_process_permutation)(
X, M_perm, features_idx, target_idx, oob_probabilities
)
for M_perm in list_permutations
Expand Down

0 comments on commit c44f7b4

Please sign in to comment.