chore: addressed linter errors and configured workspace

odu-emse · Jul 18, 2023 · bdf2ce1 · bdf2ce1
1 parent ce7891e
commit bdf2ce1
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 8 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,7 +1,13 @@
 {
   "[python]": {
-    "editor.defaultFormatter": "ms-python.black-formatter"
+    "editor.defaultFormatter": "ms-python.black-formatter",
   },
+  "python.linting.flake8Args": [
+  "--max-line-length=150"
+  ],
+  "flake8.interpreter": [
+    "venv/bin/python3"
+  ],
   "python.formatting.provider": "none",
   "cSpell.words": [
     "dataframe",

diff --git a/models/classification.py b/models/classification.py
@@ -351,7 +351,7 @@ def prepare(self, col: str) -> DataFrame:
 
             self.data = df
             self.N_CLUSTER = int(np.sqrt(len(df)))
-            if self.viz == True:
+            if self.viz:
                 self.generate_count_plot(data=df)
             self._save_data_frame(df, fileName="603_clean.csv")
 
@@ -386,7 +386,7 @@ def prepare(self, col: str) -> DataFrame:
             self.testData = dfTest
             self.N_CLUSTER = int(np.sqrt(len(dfTrain)))
             self.N_TEST_CLUSTER = int(np.sqrt(len(dfTest)))
-            if self.viz == True:
+            if self.viz:
                 self.generate_count_plot(data=dfTrain)
                 self.generate_count_plot(data=dfTest)
             self._save_data_frame(dfTrain, fileName="603_clean.csv")
@@ -468,7 +468,6 @@ def _data_encoder(self, df: DataFrame, col: str = "cluster") -> DataFrame:
         """
         Encode the data for the classification.
         """
-        from sklearn.preprocessing import LabelEncoder
 
         df[col] = self.encoder.fit_transform(df[col])
 
@@ -717,7 +716,6 @@ def _run_pca(
         Returns:
             None
         """
-        import numpy as np
         from sklearn.decomposition import PCA
 
         pca = PCA(n_components=2, random_state=42)
@@ -938,7 +936,8 @@ def generate_heat_map(
 
     def _print_sorted_similarities(self, sim_arr, threshold=0) -> DataFrame:
         """
-        Store the similarities between the documents in a data frame that is sorted by the similarity score in descending order. Removing the diagonal values.
+        Store the similarities between the documents in a data frame that is sorted by the similarity score in descending order.
+        Removing the diagonal values.
 
         Args:
             sim_arr (numpy.ndarray): The similarity array.
@@ -984,13 +983,13 @@ def run(self) -> None:
         self.read()
         self.prepare(col="features")
         self._create_model()
-        if self.viz == True:
+        if self.viz:
             self._run_word_cloud_per_cluster(df=self.data)
             if self.testPath is not None:
                 # TODO: Fix test data not having x and y columns
                 # self.generate_scatter_plot(data=self.testData)
                 pass
-        if self.verbose == True:
+        if self.verbose:
             self._log("Successfully ran the classification model")