Skip to content

Commit

Permalink
chore: addressed linter errors and configured workspace
Browse files Browse the repository at this point in the history
  • Loading branch information
chef-danny-d committed Jul 18, 2023
1 parent ce7891e commit bdf2ce1
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 8 deletions.
8 changes: 7 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
{
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
"editor.defaultFormatter": "ms-python.black-formatter",
},
"python.linting.flake8Args": [
"--max-line-length=150"
],
"flake8.interpreter": [
"venv/bin/python3"
],
"python.formatting.provider": "none",
"cSpell.words": [
"dataframe",
Expand Down
13 changes: 6 additions & 7 deletions models/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ def prepare(self, col: str) -> DataFrame:

self.data = df
self.N_CLUSTER = int(np.sqrt(len(df)))
if self.viz == True:
if self.viz:
self.generate_count_plot(data=df)
self._save_data_frame(df, fileName="603_clean.csv")

Expand Down Expand Up @@ -386,7 +386,7 @@ def prepare(self, col: str) -> DataFrame:
self.testData = dfTest
self.N_CLUSTER = int(np.sqrt(len(dfTrain)))
self.N_TEST_CLUSTER = int(np.sqrt(len(dfTest)))
if self.viz == True:
if self.viz:
self.generate_count_plot(data=dfTrain)
self.generate_count_plot(data=dfTest)
self._save_data_frame(dfTrain, fileName="603_clean.csv")
Expand Down Expand Up @@ -468,7 +468,6 @@ def _data_encoder(self, df: DataFrame, col: str = "cluster") -> DataFrame:
"""
Encode the data for the classification.
"""
from sklearn.preprocessing import LabelEncoder

df[col] = self.encoder.fit_transform(df[col])

Expand Down Expand Up @@ -717,7 +716,6 @@ def _run_pca(
Returns:
None
"""
import numpy as np
from sklearn.decomposition import PCA

pca = PCA(n_components=2, random_state=42)
Expand Down Expand Up @@ -938,7 +936,8 @@ def generate_heat_map(

def _print_sorted_similarities(self, sim_arr, threshold=0) -> DataFrame:
"""
Store the similarities between the documents in a data frame that is sorted by the similarity score in descending order. Removing the diagonal values.
Store the similarities between the documents in a data frame that is sorted by the similarity score in descending order.
Removing the diagonal values.
Args:
sim_arr (numpy.ndarray): The similarity array.
Expand Down Expand Up @@ -984,13 +983,13 @@ def run(self) -> None:
self.read()
self.prepare(col="features")
self._create_model()
if self.viz == True:
if self.viz:
self._run_word_cloud_per_cluster(df=self.data)
if self.testPath is not None:
# TODO: Fix test data not having x and y columns
# self.generate_scatter_plot(data=self.testData)
pass
if self.verbose == True:
if self.verbose:
self._log("Successfully ran the classification model")


Expand Down

0 comments on commit bdf2ce1

Please sign in to comment.