From 7bbd379824435eb8229c0615e667a9d50885c29a Mon Sep 17 00:00:00 2001 From: tomMoral Date: Sun, 1 Nov 2020 17:35:49 +0100 Subject: [PATCH 01/14] FIX test to test X[i,j] is max --- test_numpy_questions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_numpy_questions.py b/test_numpy_questions.py index db1dff9f..85ea6a49 100644 --- a/test_numpy_questions.py +++ b/test_numpy_questions.py @@ -16,7 +16,7 @@ def test_max_index(): X = np.random.randn(100, 100) i, j = max_index(X) - assert np.all(X[i, j] <= X) + assert np.all(X[i, j] >= X) with pytest.raises(ValueError): max_index(None) From a241f2ea965e14ad4c787e948892f4ae291ef155 Mon Sep 17 00:00:00 2001 From: Alexandre Gramfort Date: Sun, 1 Nov 2020 17:59:09 +0100 Subject: [PATCH 02/14] better test for wallis --- test_numpy_questions.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test_numpy_questions.py b/test_numpy_questions.py index 85ea6a49..8be9fabf 100644 --- a/test_numpy_questions.py +++ b/test_numpy_questions.py @@ -29,5 +29,11 @@ def test_max_index(): def test_wallis_product(): + pi_approx = wallis_product(0) + assert pi_approx == 2. + + pi_approx = wallis_product(1) + assert pi_approx == 8 / 3 + pi_approx = wallis_product(100000) assert abs(pi_approx - m.pi) < 1e-4 From 33020f254fb34eae5f7a0158df516ab181fded3f Mon Sep 17 00:00:00 2001 From: tomMoral Date: Sun, 1 Nov 2020 18:03:51 +0100 Subject: [PATCH 03/14] TST relax pydocstyle tests --- numpy_questions.py | 1 + sklearn_questions.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/numpy_questions.py b/numpy_questions.py index 8fb399b6..52d4ec19 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -1,3 +1,4 @@ +# noqa: D100 import numpy as np diff --git a/sklearn_questions.py b/sklearn_questions.py index 3867d48d..6a83fcca 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -1,3 +1,4 @@ +# noqa: D100 import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.validation import check_X_y, check_is_fitted @@ -7,7 +8,7 @@ class OneNearestNeighbor(BaseEstimator, ClassifierMixin): """Write docstring """ - def __init__(self): + def __init__(self): # noqa: D107 pass def fit(self, X, y): From c0520dbdf184ff9175e2b38b844b0069f2e0f25b Mon Sep 17 00:00:00 2001 From: Thomas Moreau Date: Tue, 10 Nov 2020 20:28:26 +0100 Subject: [PATCH 04/14] ENH include feedback from first datacamp (#110) * ENH include feedback from first datacamp - Pre-add check_classification_target in sklearn questions - Better describe the exercises in the module docstring - Improve the validation split. * FIX flake8 testing + comments Co-authored-by: Alexandre Gramfort * Update numpy_questions.py * Update numpy_questions.py * Update sklearn_questions.py * Fix pep8 Co-authored-by: Alexandre Gramfort --- .github/workflows/python-app.yml | 39 +++++++++++++--------- numpy_questions.py | 35 ++++++++++++++++---- sklearn_questions.py | 56 ++++++++++++++++++++++++++------ 3 files changed, 97 insertions(+), 33 deletions(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 3e8469c8..1424dbac 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -1,24 +1,19 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: build +name: Assignment Validation on: push: - - pull_request: - - create: branches: - 'main' - tags: - - '**' -jobs: - build: + pull_request: +jobs: + test: + name: Test Code runs-on: ubuntu-latest - steps: - uses: actions/checkout@v2 - name: Set up Python 3.8 @@ -28,15 +23,27 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install flake8 pytest pydocstyle + pip install pytest if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Test with pytest + run: pytest -v + lint: + name: Check code style + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install dependencies + run: | + pip install flake8 pydocstyle - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=80 --statistics - pydocstyle - - name: Test with pytest - run: | - pytest + flake8 . --count --max-complexity=10 --max-line-length=80 --statistics + - name: Check doc style with pydocstyle + run: pydocstyle diff --git a/numpy_questions.py b/numpy_questions.py index 52d4ec19..5df2c00a 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -1,4 +1,20 @@ -# noqa: D100 +"""Assignment - using numpy and making a PR. + +The goals of this assignment are: + * Use numpy in practice with two easy exercises. + * Use automated tools to validate the code (`pytest` and `flake8`) + * Submit a Pull-Request on github to practice `git`. + +The two functions below are skeleton functions. The docstrings explain what +are the inputs, the outputs and the expected error. Fill the function to +complete the assignment. The code should be able to pass the test that we +wrote. To run the tests, use `pytest test_numpy_question.py` at the root of +the repo. It should say that 2 tests ran with success. + +We also ask to respect the pep8 convention: https://pep8.org. +This will be enforced with `flake8`. You can check that there is no flake8 +errors by calling `flake8` at the root of the repo. +""" import numpy as np @@ -12,11 +28,8 @@ def max_index(X): Returns ------- - i : int - The row index of the maximum. - - j : int - The column index of the maximum. + (i, j) : tuple(int) + The row and columnd index of the maximum. Raises ------ @@ -38,8 +51,16 @@ def wallis_product(n_terms): See: https://en.wikipedia.org/wiki/Wallis_product - XXX : write Parameters and Returns sections as above. + Parameters + ---------- + n_terms : int + Number of steps in the Wallis product. Note that `n_terms=0` will + consider the product to be `1`. + Returns + ------- + pi : float + The approximation of order `n_terms` of pi using the Wallis product. """ # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. diff --git a/sklearn_questions.py b/sklearn_questions.py index 6a83fcca..5df61492 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -1,36 +1,72 @@ -# noqa: D100 +"""Assignment - making a sklearn estimator. + +The goal of this assignment is to implement by yourself a scikit-learn +estimator for the OneNearestNeighbor and check that it is working properly. + +The nearest neighbor classifier predicts for a point X_i the target y_k of +the training sample X_k which is the closest to X_i. We measure proximity with +the Euclidean distance. The model will be evaluated with the accuracy (average +number of samples corectly classified). You need to implement the `fit`, +`predict` and `score` methods for this class. The code you write should pass +the test we implemented. You can run the tests by calling at the root of the +repo `pytest test_sklearn_questions.py`. + +We also ask to respect the pep8 convention: https://pep8.org. This will be +enforced with `flake8`. You can check that there is no flake8 errors by +calling `flake8` at the root of the repo. + +Finally, you need to write docstring similar to the one in `numpy_questions` +for the methods you code and for the class. The docstring will be checked using +`pydocstyle` that you can also call at the root of the repo. +""" import numpy as np -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.validation import check_X_y, check_is_fitted +from sklearn.base import BaseEstimator +from sklearn.base import ClassifierMixin +from sklearn.utils.validation import check_X_y from sklearn.utils.validation import check_array +from sklearn.utils.validation import check_is_fitted +from sklearn.utils.multiclass import check_classification_targets class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - """Write docstring - """ + "OneNearestNeighbor classifier." def __init__(self): # noqa: D107 pass def fit(self, X, y): - """Write docstring + """Write docstring. + + And describe parameters """ X, y = check_X_y(X, y) + y = check_classification_targets(y) self.classes_ = np.unique(y) + # XXX fix return self def predict(self, X): - """Write docstring + """Write docstring. + + And describe parameters """ check_is_fitted(self) X = check_array(X) - y_pred = np.full(shape=len(X), fill_value=self.classes_[0]) + y_pred = np.full( + shape=len(X), fill_value=self.classes_[0], + dtype=self.classes_.dtype + ) + # XXX fix return y_pred def score(self, X, y): - """Write docstring + """Write docstring. + + And describe parameters """ X, y = check_X_y(X, y) y_pred = self.predict(X) - return np.mean(y_pred == y) + + # XXX fix + return y_pred.sum() From 5e9a21dafe1a86a43843b341d9b2fc8510b98c2c Mon Sep 17 00:00:00 2001 From: mathurinm Date: Fri, 17 Dec 2021 14:30:04 +0100 Subject: [PATCH 05/14] fix typo readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c5e20191..87ef801d 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ ## How? - - For the repository by clicking on the `Fork` button on the upper right corner + - Fork the repository by clicking on the `Fork` button on the upper right corner - Clone the repository of your fork with: `git clone https://github.com/MYLOGIN/datacamp-assignment1` (replace MYLOGIN with your GitHub login) - Create a branch called `myassignment` using `git checkout -b myassignment` - Make the changes to complete the assignment. You have to modify the files that contain `questions` in their name. Do not modify the files that start with `test_`. From a5006bc12928cee1eeca52c016f80205ca55dd57 Mon Sep 17 00:00:00 2001 From: mathurinm Date: Fri, 17 Dec 2021 17:09:57 +0100 Subject: [PATCH 06/14] one typo, one nasty bug --- numpy_questions.py | 2 +- sklearn_questions.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 5df2c00a..07a10c12 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -34,7 +34,7 @@ def max_index(X): Raises ------ ValueError - If the input is not a numpy error or + If the input is not a numpy array or if the shape is not 2D. """ i = 0 diff --git a/sklearn_questions.py b/sklearn_questions.py index 5df61492..f75622e1 100644 --- a/sklearn_questions.py +++ b/sklearn_questions.py @@ -6,7 +6,7 @@ The nearest neighbor classifier predicts for a point X_i the target y_k of the training sample X_k which is the closest to X_i. We measure proximity with the Euclidean distance. The model will be evaluated with the accuracy (average -number of samples corectly classified). You need to implement the `fit`, +number of samples correctly classified). You need to implement the `fit`, `predict` and `score` methods for this class. The code you write should pass the test we implemented. You can run the tests by calling at the root of the repo `pytest test_sklearn_questions.py`. @@ -30,6 +30,7 @@ class OneNearestNeighbor(BaseEstimator, ClassifierMixin): "OneNearestNeighbor classifier." + def __init__(self): # noqa: D107 pass @@ -39,7 +40,7 @@ def fit(self, X, y): And describe parameters """ X, y = check_X_y(X, y) - y = check_classification_targets(y) + check_classification_targets(y) self.classes_ = np.unique(y) # XXX fix From 001e3e408377070c253ea3bdb2984d02bcb10f19 Mon Sep 17 00:00:00 2001 From: tomMoral Date: Thu, 6 Jan 2022 13:34:22 +0100 Subject: [PATCH 07/14] MTN move sklearn assignment to another repo --- .github/workflows/python-app.yml | 1 + sklearn_questions.py | 73 -------------------------------- test_sklearn_questions.py | 31 -------------- 3 files changed, 1 insertion(+), 104 deletions(-) delete mode 100644 sklearn_questions.py delete mode 100644 test_sklearn_questions.py diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 1424dbac..520892df 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -47,3 +47,4 @@ jobs: flake8 . --count --max-complexity=10 --max-line-length=80 --statistics - name: Check doc style with pydocstyle run: pydocstyle + diff --git a/sklearn_questions.py b/sklearn_questions.py deleted file mode 100644 index f75622e1..00000000 --- a/sklearn_questions.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Assignment - making a sklearn estimator. - -The goal of this assignment is to implement by yourself a scikit-learn -estimator for the OneNearestNeighbor and check that it is working properly. - -The nearest neighbor classifier predicts for a point X_i the target y_k of -the training sample X_k which is the closest to X_i. We measure proximity with -the Euclidean distance. The model will be evaluated with the accuracy (average -number of samples correctly classified). You need to implement the `fit`, -`predict` and `score` methods for this class. The code you write should pass -the test we implemented. You can run the tests by calling at the root of the -repo `pytest test_sklearn_questions.py`. - -We also ask to respect the pep8 convention: https://pep8.org. This will be -enforced with `flake8`. You can check that there is no flake8 errors by -calling `flake8` at the root of the repo. - -Finally, you need to write docstring similar to the one in `numpy_questions` -for the methods you code and for the class. The docstring will be checked using -`pydocstyle` that you can also call at the root of the repo. -""" -import numpy as np -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import check_array -from sklearn.utils.validation import check_is_fitted -from sklearn.utils.multiclass import check_classification_targets - - -class OneNearestNeighbor(BaseEstimator, ClassifierMixin): - "OneNearestNeighbor classifier." - - def __init__(self): # noqa: D107 - pass - - def fit(self, X, y): - """Write docstring. - - And describe parameters - """ - X, y = check_X_y(X, y) - check_classification_targets(y) - self.classes_ = np.unique(y) - - # XXX fix - return self - - def predict(self, X): - """Write docstring. - - And describe parameters - """ - check_is_fitted(self) - X = check_array(X) - y_pred = np.full( - shape=len(X), fill_value=self.classes_[0], - dtype=self.classes_.dtype - ) - - # XXX fix - return y_pred - - def score(self, X, y): - """Write docstring. - - And describe parameters - """ - X, y = check_X_y(X, y) - y_pred = self.predict(X) - - # XXX fix - return y_pred.sum() diff --git a/test_sklearn_questions.py b/test_sklearn_questions.py deleted file mode 100644 index 9fab779c..00000000 --- a/test_sklearn_questions.py +++ /dev/null @@ -1,31 +0,0 @@ -# ################################################## -# YOU SHOULD NOT TOUCH THIS FILE ! -# ################################################## - -from sklearn.utils.estimator_checks import check_estimator -from sklearn.model_selection import train_test_split -from sklearn.datasets import make_classification -from sklearn.neighbors import KNeighborsClassifier - -from sklearn_questions import OneNearestNeighbor - -from numpy.testing import assert_array_equal - - -def test_one_nearest_neighbor_check_estimator(): - check_estimator(OneNearestNeighbor()) - - -def test_one_nearest_neighbor_match_sklearn(): - X, y = make_classification(n_samples=200, n_features=20, - random_state=42) - X_train, X_test, y_train, y_test = \ - train_test_split(X, y, random_state=42) - knn = KNeighborsClassifier(n_neighbors=1) - y_pred_sk = knn.fit(X_train, y_train).predict(X_test) - - onn = OneNearestNeighbor() - y_pred_me = onn.fit(X_train, y_train).predict(X_test) - assert_array_equal(y_pred_me, y_pred_sk) - - assert onn.score(X_test, y_test) == knn.score(X_test, y_test) From a7039ec075ceb33338f329ddb2dc68763ed68b1a Mon Sep 17 00:00:00 2001 From: tomMoral Date: Thu, 6 Jan 2022 13:49:42 +0100 Subject: [PATCH 08/14] ENH update README --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 87ef801d..57727aae 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,10 @@ - Fork the repository by clicking on the `Fork` button on the upper right corner - Clone the repository of your fork with: `git clone https://github.com/MYLOGIN/datacamp-assignment1` (replace MYLOGIN with your GitHub login) - - Create a branch called `myassignment` using `git checkout -b myassignment` + - Create a branch called `my_user_name` using `git checkout -b my_user_name` - Make the changes to complete the assignment. You have to modify the files that contain `questions` in their name. Do not modify the files that start with `test_`. + - Check locally that your solution meet the test by running `pytest` from the root of the repo. You may need to install `pytest` using `pip` or `conda`. + - Check the code formating for your solution using `flake8`. You may need to install `flake8` using `pip` or `conda`. - Open the pull request on GitHub - Keep pushing to your branch until the continuous integration system is green. - When it is green notify the professors on Slack that your done. From bb731a745eb300bc84ade8616e86a31cb6a91ac0 Mon Sep 17 00:00:00 2001 From: tomMoral Date: Thu, 6 Jan 2022 13:52:27 +0100 Subject: [PATCH 09/14] ENH update README --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 57727aae..bedf361c 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,10 @@ - Make the changes to complete the assignment. You have to modify the files that contain `questions` in their name. Do not modify the files that start with `test_`. - Check locally that your solution meet the test by running `pytest` from the root of the repo. You may need to install `pytest` using `pip` or `conda`. - Check the code formating for your solution using `flake8`. You may need to install `flake8` using `pip` or `conda`. - - Open the pull request on GitHub + - Open the pull request on GitHub: + - Create a commit with `git add -u` and `git commit -m "UP my solution" + - Push your branch on your fork: `git push -u origin my_user_name` + - Go to your repo in your browser and click the `Open a PR` button. - Keep pushing to your branch until the continuous integration system is green. - When it is green notify the professors on Slack that your done. From c2b68a5d0d5fe48c0029cb7bd135a3edb6a378e4 Mon Sep 17 00:00:00 2001 From: tomMoral Date: Thu, 6 Jan 2022 13:53:08 +0100 Subject: [PATCH 10/14] FIX typo in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bedf361c..4abd393c 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ - Check locally that your solution meet the test by running `pytest` from the root of the repo. You may need to install `pytest` using `pip` or `conda`. - Check the code formating for your solution using `flake8`. You may need to install `flake8` using `pip` or `conda`. - Open the pull request on GitHub: - - Create a commit with `git add -u` and `git commit -m "UP my solution" + - Create a commit with `git add -u` and `git commit -m "UP my solution"` - Push your branch on your fork: `git push -u origin my_user_name` - Go to your repo in your browser and click the `Open a PR` button. - Keep pushing to your branch until the continuous integration system is green. From 1fe238b87815f5d9c13cde278d8fbfd72c881cf6 Mon Sep 17 00:00:00 2001 From: tomMoral Date: Thu, 6 Jan 2022 16:14:31 +0100 Subject: [PATCH 11/14] ENH update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4abd393c..a17c85eb 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Assignment 1 for the DataCamp course X-DataScience Master +# Assignment 1 for the DataCamp course X-DataScience Master - numpy ## What we want you to learn by doing this assignment: From d31f7efab1414fc22b0ee1749c3511f2e0c9300a Mon Sep 17 00:00:00 2001 From: tomMoral Date: Thu, 6 Jan 2022 16:17:01 +0100 Subject: [PATCH 12/14] FIX repo to clone from --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a17c85eb..823291dc 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ ## How? - Fork the repository by clicking on the `Fork` button on the upper right corner - - Clone the repository of your fork with: `git clone https://github.com/MYLOGIN/datacamp-assignment1` (replace MYLOGIN with your GitHub login) + - Clone the repository of your fork with: `git clone https://github.com/MYLOGIN/datacamp-assignment-numpy` (replace MYLOGIN with your GitHub login) - Create a branch called `my_user_name` using `git checkout -b my_user_name` - Make the changes to complete the assignment. You have to modify the files that contain `questions` in their name. Do not modify the files that start with `test_`. - Check locally that your solution meet the test by running `pytest` from the root of the repo. You may need to install `pytest` using `pip` or `conda`. From b20c23112526c8a219000fbfc7eb8054ebf65f14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robin=20Labb=C3=A9?= Date: Mon, 10 Jan 2022 15:12:24 +0100 Subject: [PATCH 13/14] Tentative de commit sur la branche solution --- numpy_questions.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/numpy_questions.py b/numpy_questions.py index 07a10c12..55c00060 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -41,7 +41,11 @@ def max_index(X): j = 0 # TODO - + p,q = X.shape + for i1 in range(p): + for j1 in range(q): + if X[i1,j1]> X[i,j]: + i,j = i1,j1 return i, j From 4daec19dc31b56728a4f9dcffe239dafb3dda20f Mon Sep 17 00:00:00 2001 From: robincwlabbe Date: Mon, 10 Jan 2022 15:48:27 +0100 Subject: [PATCH 14/14] =?UTF-8?q?Test=20numpy=20r=C3=A9ussi=20par=20pytest?= =?UTF-8?q?=20et=20flake8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- numpy_questions.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/numpy_questions.py b/numpy_questions.py index 55c00060..e8174fc1 100644 --- a/numpy_questions.py +++ b/numpy_questions.py @@ -41,11 +41,13 @@ def max_index(X): j = 0 # TODO - p,q = X.shape - for i1 in range(p): - for j1 in range(q): - if X[i1,j1]> X[i,j]: - i,j = i1,j1 + if type(X) is not np.ndarray: + raise ValueError('X input is not a numpy array') + if X.ndim != 2: + raise ValueError('X input array is note a 2D-array') + argmax = np.argwhere(X == X.max()) + i, j = argmax[0][0], argmax[0][1] + return i, j @@ -68,4 +70,10 @@ def wallis_product(n_terms): """ # XXX : The n_terms is an int that corresponds to the number of # terms in the product. For example 10000. - return 0. + if type(n_terms) != int: + raise ValueError('The input is not an integer') + + I_n = 1 + for i in range(1, n_terms+1): + I_n *= (4*i**2)/(4*i**2-1) + return 2*I_n