✅ Changes following the PR and adapt the root of the 'make coverage' …

…command
scikit-learn-contrib · Apr 30, 2024 · 4fa0378 · 4fa0378
1 parent 23aa9b4
commit 4fa0378
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 14 deletions.
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 coverage:
-	pytest --cov-branch --cov=qolmat --cov-report=xml
+	pytest --cov-branch --cov=qolmat --cov-report=xml tests
 
 doctest:
 	pytest --doctest-modules --pyargs qolmat

diff --git a/examples/tutorials/plot_tuto_mcar_test.py b/examples/tutorials/plot_tuto_mcar_test.py
@@ -3,9 +3,9 @@
 Tutorial for testing the MCAR case
 ============================================
 
-In this tutorial, we show how to use the mcar test classe and it methods
+In this tutorial, we show how to use the mcar test class and its methods.
 
-Keep in my mind that, at this moment, the mcar tests are only handle tabular data.
+Keep in my mind that, at this moment, the mcar tests only handle tabular data.
 """
 # %%
 # First import some libraries
@@ -33,6 +33,13 @@
 # missing patterns and won't be efficient to detect the heterogeneity of covariance between missing
 # patterns.
 #
+# The null hypothesis, H0, is : "The data are MCAR". Against,
+# The alternative hypothesis : " The data are not MCAR, the means of the observed variables can
+# vary across the patterns"
+#
+# We choose to use the classic threshold, equal to 5%. If the test pval is below this threshold,
+# we reject the null hypothesis.
+#
 # This notebook shows how the Little's test performs and its limitations.
 
 np.random.seed(11)
@@ -43,7 +50,7 @@
 # Case 1 : Normal iid feature with MCAR holes
 # ===========================================
 
-matrix = np.random.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=100)
+matrix = np.random.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=200)
 matrix.ravel()[np.random.choice(matrix.size, size=20, replace=False)] = np.nan
 matrix_masked = matrix[np.argwhere(np.isnan(matrix))]
 df_1 = pd.DataFrame(matrix)
@@ -53,7 +60,7 @@
 
 plt.legend(
     (plt_1, plt_2),
-    ("observed_values", "masked_vlues"),
+    ("observed_values", "masked_values"),
     scatterpoints=1,
     loc="lower left",
     ncol=1,
@@ -78,9 +85,9 @@
 # ==========================================
 np.random.seed(11)
 
-matrix = np.random.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=100)
+matrix = np.random.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=200)
 threshold = random.uniform(0, 1)
-matrix[np.argwhere(matrix[:, 0] > 1.96), 1] = np.nan
+matrix[np.argwhere(matrix[:, 0] >= 1.96), 1] = np.nan
 matrix_masked = matrix[np.argwhere(np.isnan(matrix))]
 df_2 = pd.DataFrame(matrix)
 
@@ -118,8 +125,8 @@
 
 np.random.seed(11)
 
-matrix = np.random.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=100)
-matrix[np.argwhere(abs(matrix[:, 0]) >= 1.95), 1] = np.nan
+matrix = np.random.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=200)
+matrix[np.argwhere(abs(matrix[:, 0]) >= 1.96), 1] = np.nan
 matrix_masked = matrix[np.argwhere(np.isnan(matrix))]
 df_3 = pd.DataFrame(matrix)
 

diff --git a/tests/audit/test_holes_characterization.py b/tests/audit/test_holes_characterization.py
@@ -9,27 +9,32 @@
 @pytest.fixture
 def mcar_df() -> pd.DataFrame:
     rng = np.random.default_rng(42)
-    matrix = rng.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=100)
+    matrix = rng.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=200)
     matrix.ravel()[rng.choice(matrix.size, size=20, replace=False)] = np.nan
     return pd.DataFrame(data=matrix)
 
 
 @pytest.fixture
 def mar_hm_df() -> pd.DataFrame:
     rng = np.random.default_rng(42)
-    matrix = rng.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=100)
-    matrix[np.argwhere(matrix[:, 0] > 1.96), 1] = np.nan
+    matrix = rng.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=200)
+    matrix[np.argwhere(matrix[:, 0] >= 1.96), 1] = np.nan
     return pd.DataFrame(data=matrix)
 
 
 @pytest.fixture
 def mcar_hc_df() -> pd.DataFrame:
     rng = np.random.default_rng(42)
-    matrix = rng.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=100)
-    matrix[np.argwhere(abs(matrix[:, 0]) >= 1.95), 1] = np.nan
+    matrix = rng.multivariate_normal(mean=[0, 0], cov=[[1, 0], [0, 1]], size=200)
+    matrix[np.argwhere(abs(matrix[:, 0]) >= 1.96), 1] = np.nan
     return pd.DataFrame(data=matrix)
 
 
+def test_mcar__init__():
+    with pytest.raises(ValueError):
+        _ = MCARTest(method="hello")
+
+
 @pytest.mark.parametrize(
     "df_input, expected", [("mcar_df", True), ("mar_hm_df", False), ("mcar_hc_df", True)]
 )