diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 44f080f9..29cd0de3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -47,12 +47,12 @@ repos: - id: black - id: black-jupyter - repo: https://github.com/pre-commit/mirrors-prettier - rev: v4.0.0-alpha.8 + rev: v3.1.0 hooks: - id: prettier exclude: (\.secret\.)|(pnpm-lock.yaml) - repo: https://github.com/pre-commit/mirrors-eslint - rev: v9.0.0-alpha.2 + rev: v8.56.0 hooks: - id: eslint files: src diff --git a/package.json b/package.json index 1aabca73..28c33636 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "@tippyjs/react": "^4.2.6", "@types/dompurify": "^3.0.2", "@uiw/react-json-view": "2.0.0-alpha.7", + "bleu-score": "^1.0.4", "chroma-js": "^2.4.2", "comlink": "^4.4.1", "cropperjs": "^1.5.13", @@ -91,8 +92,7 @@ "unique-names-generator": "^4.7.1", "uuid": "^9.0.0", "wavesurfer.js": "^6.4.0", - "zustand": "^4.3.3", - "bleu-score": "^1.0.4" + "zustand": "^4.3.3" }, "devDependencies": { "@openapitools/openapi-generator-cli": "^2.7.0", @@ -132,7 +132,7 @@ "eslint-plugin-react-hooks": "^4.6.0", "jest": "^29.4.3", "node-fetch": "^2.6.9", - "prettier": "^2.7.1", + "prettier": "^3.2.5", "tailwindcss": "^3.3.1", "ts-jest": "^29.0.5", "twin.macro": "^3.3.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9e539655..b816d0e4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1,9 +1,5 @@ lockfileVersion: '6.0' -settings: - autoInstallPeers: true - excludeLinksFromLockfile: false - dependencies: '@dnd-kit/core': specifier: ^6.0.8 @@ -31,7 +27,7 @@ dependencies: version: 3.0.2 '@uiw/react-json-view': specifier: 2.0.0-alpha.7 - version: 2.0.0-alpha.7(@babel/runtime@7.23.2)(react-dom@18.2.0)(react@18.2.0) + version: 2.0.0-alpha.7(@babel/runtime@7.23.9)(react-dom@18.2.0)(react@18.2.0) bleu-score: specifier: ^1.0.4 version: 1.0.4 @@ -317,11 +313,11 @@ devDependencies: specifier: ^2.6.9 version: 2.6.9 prettier: - specifier: ^2.7.1 - version: 2.7.1 + specifier: ^3.2.5 + version: 3.2.5 tailwindcss: specifier: ^3.3.1 - version: 3.3.1(postcss@8.4.31) + version: 3.3.1(postcss@8.4.35) ts-jest: specifier: ^29.0.5 version: 29.0.5(@babel/core@7.22.9)(jest@29.4.3)(typescript@5.1.6) @@ -715,6 +711,14 @@ packages: engines: {node: '>=6.9.0'} dependencies: regenerator-runtime: 0.14.0 + dev: true + + /@babel/runtime@7.23.9: + resolution: {integrity: sha512-0CX6F+BI2s9dkUqr08KFrAIZgNFj75rdBU/DjCyYLIaV/quFjkk6T+EJ2LkZHyZTbEV4L5p97mNkUsHl2wLFAw==} + engines: {node: '>=6.9.0'} + dependencies: + regenerator-runtime: 0.14.1 + dev: false /@babel/template@7.22.15: resolution: {integrity: sha512-QPErUVm4uyJa60rkI73qneDacvdvzxshT3kksGqlGWYdOTIUOwJ7RDUL8sGqslY1uXWSL6xMFKEXDS3ox2uF0w==} @@ -1897,7 +1901,7 @@ packages: lodash.isplainobject: 4.0.6 lodash.merge: 4.6.2 postcss-selector-parser: 6.0.10 - tailwindcss: 3.3.1(postcss@8.4.31) + tailwindcss: 3.3.1(postcss@8.4.35) dev: false /@testing-library/dom@9.3.1: @@ -2510,14 +2514,14 @@ packages: eslint-visitor-keys: 3.4.2 dev: true - /@uiw/react-json-view@2.0.0-alpha.7(@babel/runtime@7.23.2)(react-dom@18.2.0)(react@18.2.0): + /@uiw/react-json-view@2.0.0-alpha.7(@babel/runtime@7.23.9)(react-dom@18.2.0)(react@18.2.0): resolution: {integrity: sha512-hhlDlQcszeg3IcwxIc7NLpkzOVs4A4rB0IgkVZkeUnLOEx2AMlE2hlybgxJfm4E0c9NoRmOFgrYkL1SbeXBW4w==} peerDependencies: '@babel/runtime': '>=7.10.0' react: '>=18.0.0' react-dom: '>=18.0.0' dependencies: - '@babel/runtime': 7.23.2 + '@babel/runtime': 7.23.9 react: 18.2.0 react-dom: 18.2.0(react@18.2.0) dev: false @@ -6020,6 +6024,12 @@ packages: resolution: {integrity: sha512-BGcqMMJuToF7i1rt+2PWSNVnWIkGCU78jBG3RxO/bZlnZPK2Cmi2QaffxGO/2RvWi9sL+FAiRiXMgsyxQ1DIDA==} engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} hasBin: true + dev: true + + /nanoid@3.3.7: + resolution: {integrity: sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==} + engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} + hasBin: true /natural-compare-lite@1.4.0: resolution: {integrity: sha512-Tj+HTDSJJKaZnfiuw+iaF9skdPpTo2GtEly5JHnWV/hfv2Qj/9RKsGISQtLh2ox3l5EAGw487hnBee0sIJ6v2g==} @@ -6361,27 +6371,27 @@ packages: deprecated: You can find the new Popper v2 at @popperjs/core, this package is dedicated to the legacy v1 dev: false - /postcss-import@14.1.0(postcss@8.4.31): + /postcss-import@14.1.0(postcss@8.4.35): resolution: {integrity: sha512-flwI+Vgm4SElObFVPpTIT7SU7R3qk2L7PyduMcokiaVKuWv9d/U+Gm/QAd8NDLuykTWTkcrjOeD2Pp1rMeBTGw==} engines: {node: '>=10.0.0'} peerDependencies: postcss: ^8.0.0 dependencies: - postcss: 8.4.31 + postcss: 8.4.35 postcss-value-parser: 4.2.0 read-cache: 1.0.0 resolve: 1.22.4 - /postcss-js@4.0.1(postcss@8.4.31): + /postcss-js@4.0.1(postcss@8.4.35): resolution: {integrity: sha512-dDLF8pEO191hJMtlHFPRa8xsizHaM82MLfNkUHdUtVEV3tgTp5oj+8qbEqYM57SLfc74KSbw//4SeJma2LRVIw==} engines: {node: ^12 || ^14 || >= 16} peerDependencies: postcss: ^8.4.21 dependencies: camelcase-css: 2.0.1 - postcss: 8.4.31 + postcss: 8.4.35 - /postcss-load-config@3.1.4(postcss@8.4.31): + /postcss-load-config@3.1.4(postcss@8.4.35): resolution: {integrity: sha512-6DiM4E7v4coTE4uzA8U//WhtPwyhiim3eyjEMFCnUpzbrkK9wJHgKDT2mR+HbtSrd/NubVaYTOpSpjUl8NQeRg==} engines: {node: '>= 10'} peerDependencies: @@ -6394,16 +6404,16 @@ packages: optional: true dependencies: lilconfig: 2.1.0 - postcss: 8.4.31 + postcss: 8.4.35 yaml: 1.10.2 - /postcss-nested@6.0.0(postcss@8.4.31): + /postcss-nested@6.0.0(postcss@8.4.35): resolution: {integrity: sha512-0DkamqrPcmkBDsLn+vQDIrtkSbNkv5AD/M322ySo9kqFkCIYklym2xEmWkwo+Y3/qZo34tzEPNUw4y7yMCdv5w==} engines: {node: '>=12.0'} peerDependencies: postcss: ^8.2.14 dependencies: - postcss: 8.4.31 + postcss: 8.4.35 postcss-selector-parser: 6.0.13 /postcss-selector-parser@6.0.10: @@ -6435,6 +6445,15 @@ packages: nanoid: 3.3.6 picocolors: 1.0.0 source-map-js: 1.0.2 + dev: true + + /postcss@8.4.35: + resolution: {integrity: sha512-u5U8qYpBCpN13BsiEB0CbR1Hhh4Gc0zLFuedrHJKMctHCHAGrMdG0PRM/KErzAL3CU6/eckEtmHNB3x6e3c0vA==} + engines: {node: ^10 || ^12 || >=14} + dependencies: + nanoid: 3.3.7 + picocolors: 1.0.0 + source-map-js: 1.0.2 /potpack@1.0.2: resolution: {integrity: sha512-choctRBIV9EMT9WGAZHn3V7t0Z2pMQyl0EZE6pFc/6ml3ssw7Dlf/oAOvFwjm1HVsqfQN8GfeFyJ+d8tRzqueQ==} @@ -6445,9 +6464,9 @@ packages: engines: {node: '>= 0.8.0'} dev: true - /prettier@2.7.1: - resolution: {integrity: sha512-ujppO+MkdPqoVINuDFDRLClm7D78qbDt0/NR+wp5FqEZOoTNAjPHWj17QRhu7geIHJfcNhRk1XVQmF8Bp3ye+g==} - engines: {node: '>=10.13.0'} + /prettier@3.2.5: + resolution: {integrity: sha512-3/GWa9aOC0YeD7LUfvOG2NiDyhOWRvt1k+rcKhOuYnMY24iiCphgneUfJDyFXd6rZCAnuLBv6UeAULtrhT/F4A==} + engines: {node: '>=14'} hasBin: true dev: true @@ -6929,6 +6948,11 @@ packages: /regenerator-runtime@0.14.0: resolution: {integrity: sha512-srw17NI0TUWHuGa5CFGGmhfNIeja30WMBfbslPNhf6JrqQlLN5gcrvig1oqPxiVaXb0oW0XRKtH6Nngs5lKCIA==} + dev: true + + /regenerator-runtime@0.14.1: + resolution: {integrity: sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw==} + dev: false /regexp-to-ast@0.5.0: resolution: {integrity: sha512-tlbJqcMHnPKI9zSrystikWKwHkBqu2a/Sgw01h3zFjvYrMxEDYHzzoMZnUrbIfpTFEsoRnnviOXNCzFiSc54Qw==} @@ -7437,7 +7461,7 @@ packages: resolution: {integrity: sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew==} dev: false - /tailwindcss@3.3.1(postcss@8.4.31): + /tailwindcss@3.3.1(postcss@8.4.35): resolution: {integrity: sha512-Vkiouc41d4CEq0ujXl6oiGFQ7bA3WEhUZdTgXAhtKxSy49OmKs8rEfQmupsfF0IGW8fv2iQkp1EVUuapCFrZ9g==} engines: {node: '>=12.13.0'} hasBin: true @@ -7458,11 +7482,11 @@ packages: normalize-path: 3.0.0 object-hash: 3.0.0 picocolors: 1.0.0 - postcss: 8.4.31 - postcss-import: 14.1.0(postcss@8.4.31) - postcss-js: 4.0.1(postcss@8.4.31) - postcss-load-config: 3.1.4(postcss@8.4.31) - postcss-nested: 6.0.0(postcss@8.4.31) + postcss: 8.4.35 + postcss-import: 14.1.0(postcss@8.4.35) + postcss-js: 4.0.1(postcss@8.4.35) + postcss-load-config: 3.1.4(postcss@8.4.35) + postcss-nested: 6.0.0(postcss@8.4.35) postcss-selector-parser: 6.0.13 postcss-value-parser: 4.2.0 quick-lru: 5.1.1 @@ -7699,7 +7723,7 @@ packages: lodash.get: 4.4.2 lodash.merge: 4.6.2 postcss-selector-parser: 6.0.13 - tailwindcss: 3.3.1(postcss@8.4.31) + tailwindcss: 3.3.1(postcss@8.4.35) dev: true /type-check@0.4.0: @@ -8216,3 +8240,7 @@ packages: react: 18.2.0 use-sync-external-store: 1.2.0(react@18.2.0) dev: false + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false diff --git a/renumics/spotlight/backend/exceptions.py b/renumics/spotlight/backend/exceptions.py index f84194fa..14431cf4 100644 --- a/renumics/spotlight/backend/exceptions.py +++ b/renumics/spotlight/backend/exceptions.py @@ -138,17 +138,6 @@ def __init__(self, dtype: DType, value: Any) -> None: ) -class DatasetColumnsNotUnique(Problem): - """Dataset's columns are not unique""" - - def __init__(self) -> None: - super().__init__( - "Dataset columns not unique", - "Dataset's columns are not unique.", - status.HTTP_403_FORBIDDEN, - ) - - class InvalidLayout(Problem): """The layout could not be parsed from the given source""" diff --git a/renumics/spotlight/dataset/pandas.py b/renumics/spotlight/dataset/pandas.py index 222ee66f..47701c9b 100644 --- a/renumics/spotlight/dataset/pandas.py +++ b/renumics/spotlight/dataset/pandas.py @@ -132,7 +132,7 @@ def infer_dtype(column: pd.Series) -> dtypes.DType: if pd.api.types.is_bool_dtype(column): return dtypes.bool_dtype - if pd.api.types.is_categorical_dtype(column): + if isinstance(column.dtype, pd.CategoricalDtype): return dtypes.CategoryDType( {category: code for code, category in enumerate(column.cat.categories)} ) diff --git a/renumics/spotlight_plugins/core/pandas_data_source.py b/renumics/spotlight_plugins/core/pandas_data_source.py index a9022147..a2ad50f7 100644 --- a/renumics/spotlight_plugins/core/pandas_data_source.py +++ b/renumics/spotlight_plugins/core/pandas_data_source.py @@ -3,17 +3,15 @@ """ from pathlib import Path -from typing import Any, List, Union, cast +from typing import List, Union, cast import datasets import numpy as np import pandas as pd from renumics.spotlight import dtypes -from renumics.spotlight.backend.exceptions import DatasetColumnsNotUnique from renumics.spotlight.data_source import ColumnMetadata, DataSource, datasource from renumics.spotlight.data_source.exceptions import InvalidDataSource -from renumics.spotlight.dataset.exceptions import ColumnNotExistsError from renumics.spotlight.io import prepare_hugging_face_dict, try_literal_eval @@ -87,20 +85,34 @@ def __init__(self, source: Union[Path, pd.DataFrame]): df = cast(pd.DataFrame, source) self._name = "pd.DataFrame" - if not df.columns.is_unique: - raise DatasetColumnsNotUnique() self._generation_id = 0 self._uid = str(id(df)) self._df = df.convert_dtypes() self._intermediate_dtypes = { - # TODO: convert column name - col: _determine_intermediate_dtype(self._df[col]) - for col in self._df.columns + column_name: _determine_intermediate_dtype(self._get_column(column_name)) + for column_name in self.column_names } @property def column_names(self) -> List[str]: - return [str(column) for column in self._df.columns] + column_names: List[str] = [] + for column in self._df.columns: + if ( + isinstance(column, tuple) + and len(column) + and all(name == "" for name in column[1:]) + ): + column_name = str(column[0]) + else: + column_name = str(column) + if column_name not in column_names: + column_names.append(column_name) + continue + i = 1 + while f"{column_name} ({i})" in column_names: + i += 1 + column_names.append(f"{column_name} ({i})") + return column_names @property def df(self) -> pd.DataFrame: @@ -134,8 +146,7 @@ def get_column_values( column_name: str, indices: Union[List[int], np.ndarray, slice] = slice(None), ) -> np.ndarray: - column_index = self._parse_column_index(column_name) - column: pd.Series = self._df[column_index].iloc[indices] + column = cast(pd.Series, self._get_column(column_name).iloc[indices]) # type: ignore if pd.api.types.is_bool_dtype(column): values = column.to_numpy(na_value=pd.NA) # type: ignore na_mask = column.isna() @@ -156,7 +167,7 @@ def get_column_values( return values if pd.api.types.is_datetime64_any_dtype(column): return column.dt.tz_localize(None).to_numpy() - if pd.api.types.is_categorical_dtype(column): + if isinstance(column.dtype, pd.CategoricalDtype): return column.cat.codes.to_numpy() if pd.api.types.is_string_dtype(column): column = column.astype(object).mask(column.isna(), None) @@ -182,29 +193,21 @@ def get_column_values( def get_column_metadata(self, _: str) -> ColumnMetadata: return ColumnMetadata(nullable=True, editable=True) - def _parse_column_index(self, column_name: str) -> Any: + def _get_column(self, column_name: str) -> pd.Series: + return cast(pd.Series, self._df.iloc[:, self._get_column_index(column_name)]) + + def _get_column_index(self, column_name: str) -> int: column_names = self.column_names - try: - loc = self._df.columns.get_loc(column_name) - except KeyError: - ... - else: - if isinstance(self._df.columns, pd.MultiIndex): - return self._df.columns[loc][0] - return self._df.columns[loc] - try: - index = column_names.index(column_name) - except ValueError as e: - raise ColumnNotExistsError( - f"Column '{column_name}' doesn't exist in the dataset." - ) from e - return self._df.columns[index] + + assert column_name in column_names + + return column_names.index(column_name) def _determine_intermediate_dtype(column: pd.Series) -> dtypes.DType: if pd.api.types.is_bool_dtype(column): return dtypes.bool_dtype - if pd.api.types.is_categorical_dtype(column): + if isinstance(column.dtype, pd.CategoricalDtype): return dtypes.CategoryDType( {category: code for code, category in enumerate(column.cat.categories)} ) diff --git a/src/client/models/DataIssue.ts b/src/client/models/DataIssue.ts index 5375cc38..e093f7cc 100644 --- a/src/client/models/DataIssue.ts +++ b/src/client/models/DataIssue.ts @@ -60,7 +60,7 @@ export const DataIssueSeverityEnum = { High: 'high', } as const; export type DataIssueSeverityEnum = - typeof DataIssueSeverityEnum[keyof typeof DataIssueSeverityEnum]; + (typeof DataIssueSeverityEnum)[keyof typeof DataIssueSeverityEnum]; /** * Check if a given object implements the DataIssue interface. diff --git a/src/client/models/FileEntry.ts b/src/client/models/FileEntry.ts index a7bba400..81ac46ac 100644 --- a/src/client/models/FileEntry.ts +++ b/src/client/models/FileEntry.ts @@ -53,7 +53,7 @@ export const FileEntryTypeEnum = { Folder: 'folder', } as const; export type FileEntryTypeEnum = - typeof FileEntryTypeEnum[keyof typeof FileEntryTypeEnum]; + (typeof FileEntryTypeEnum)[keyof typeof FileEntryTypeEnum]; /** * Check if a given object implements the FileEntry interface. diff --git a/src/client/runtime.ts b/src/client/runtime.ts index ee4a012e..7a178f34 100644 --- a/src/client/runtime.ts +++ b/src/client/runtime.ts @@ -294,21 +294,30 @@ function isFormData(value: any): value is FormData { export class ResponseError extends Error { override name: 'ResponseError' = 'ResponseError'; - constructor(public response: Response, msg?: string) { + constructor( + public response: Response, + msg?: string + ) { super(msg); } } export class FetchError extends Error { override name: 'FetchError' = 'FetchError'; - constructor(public cause: Error, msg?: string) { + constructor( + public cause: Error, + msg?: string + ) { super(msg); } } export class RequiredError extends Error { override name: 'RequiredError' = 'RequiredError'; - constructor(public field: string, msg?: string) { + constructor( + public field: string, + msg?: string + ) { super(msg); } } diff --git a/src/components/ColumnSelector/ColumnListItem.tsx b/src/components/ColumnSelector/ColumnListItem.tsx index 3eea6d76..9c09d962 100644 --- a/src/components/ColumnSelector/ColumnListItem.tsx +++ b/src/components/ColumnSelector/ColumnListItem.tsx @@ -57,14 +57,16 @@ const ColumnListItem = ({ column, selected, onChangeSelected }: Props): JSX.Elem {column.name}