diff --git a/docs/user-guide.rst b/docs/user-guide.rst index cc459965..50b90971 100644 --- a/docs/user-guide.rst +++ b/docs/user-guide.rst @@ -350,7 +350,73 @@ Check out the api docs for `DataValidationRule`_ and `CondtionType`_ for more de .. _CondtionType: https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#ConditionType -.. _DataValidationRule: https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#DataValidationRule +.. _DataValidationRule: https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/cells#DataValidationRule + +Extract table +~~~~~~~~~~~~~ + +Gspread provides a function to extract a data table. +A data table is defined as a rectangular table that stops either on the **first empty** cell or +the **enge of the sheet**. + +You can extract table from any address by providing the top left corner of the desired table. + +Gspread provides 3 directions for searching the end of the table: + + * :attr:`~gspread.utils.TableDirection.right`: extract a single row searching on the right of the starting cell + * :attr:`~gspread.utils.TableDirection.down`: extract a single column searching on the bottom of the starting cell + * :attr:`~gspread.utils.TableDirection.table`: extract a rectangular table by first searching right from starting cell, + then searching down from starting cell. + + .. note:: + + Gspread will not look for empty cell inside the table. it only look at the top row and first column. + +Example extracting a table from the below sample sheet: + +.. list-table:: Find table + :header-rows: 1 + + * - ID + - Name + - Universe + - Super power + * - 1 + - Batman + - DC + - Very rich + * - 2 + - DeadPool + - Marvel + - self healing + * - 3 + - Superman + - DC + - super human + * - + - \- + - \- + - \- + * - 5 + - Lavigne958 + - + - maintains Gspread + * - 6 + - Alifee + - + - maintains Gspread + +Using the below code will result in rows 2 to 4: + +.. code:: python + + worksheet.expand("A2") + + [ + ["Batman", "DC", "Very rich"], + ["DeadPool", "Marvel", "self healing"], + ["Superman", "DC", "super human"], + ] diff --git a/gspread/utils.py b/gspread/utils.py index 6f3fedef..6daac861 100644 --- a/gspread/utils.py +++ b/gspread/utils.py @@ -168,6 +168,12 @@ class ValidationConditionType(StrEnum): filter_expression = "FILTER_EXPRESSION" +class TableDirection(StrEnum): + table = "TABLE" + down = "DOWN" + right = "RIGHT" + + def convert_credentials(credentials: Credentials) -> Credentials: module = credentials.__module__ cls = credentials.__class__.__name__ @@ -979,6 +985,129 @@ def to_records( return [dict(zip(headers, row)) for row in values] +def _expand_right(values: List[List[str]], start: int, end: int, row: int) -> int: + """This is a private function, returning the column index of the last non empty cell + on the given row. + + Search starts from ``start`` index column. + Search ends on ``end`` index column. + Searches only in the row pointed by ``row``. + """ + try: + return values[row].index("", start, end) - 1 + except ValueError: + return end + + +def _expand_bottom(values: List[List[str]], start: int, end: int, col: int) -> int: + """This is a private function, returning the row index of the last non empty cell + on the given column. + + Search starts from ``start`` index row. + Search ends on ``end`` index row. + Searches only in the column pointed by ``col``. + """ + for rows in range(start, end): + # in case we try to look further than last row + if rows >= len(values): + return len(values) - 1 + + # check if cell is empty (or the row => empty cell) + if col >= len(values[rows]) or values[rows][col] == "": + return rows - 1 + + return end - 1 + + +def find_table( + values: List[List[str]], + start_range: str, + direction: TableDirection = TableDirection.table, +) -> List[List[str]]: + """Expands a list of values based on non-null adjacent cells. + + Expand can be done in 3 directions defined in :class:`~gspread.utils.TableDirection` + + * ``TableDirection.right``: expands right until the first empty cell + * ``TableDirection.down``: expands down until the first empty cell + * ``TableDirection.table``: expands right until the first empty cell and down until first empty cell + + In case of empty result an empty list is restuned. + + When the given ``start_range`` is outside the given matrix of values the exception + :class:`~gspread.exceptions.InvalidInputValue` is raised. + + Example:: + + values = [ + ['', '', '', '', '' ], + ['', 'B2', 'C2', '', 'E2'], + ['', 'B3', 'C3', '', 'E3'], + ['', '' , '' , '', 'E4'], + ] + >>> utils.find_table(TableDirection.table, 'B2') + [ + ['B2', 'C2'], + ['B3', 'C3'], + ] + + + .. note:: + + the ``TableDirection.table`` will look right from starting cell then look down from starting cell. + It will not check cells located inside the table. This could lead to + potential empty values located in the middle of the table. + + .. warning:: + + Given values must be padded with `''` empty values. + + :param list[list] values: values where to find the table. + :param gspread.utils.TableDirection direction: the expand direction. + :param str start_range: the starting cell range. + :rtype list(list): the resulting matrix + """ + row, col = a1_to_rowcol(start_range) + + # a1_to_rowcol returns coordinates starting form 1 + row -= 1 + col -= 1 + + if row >= len(values): + raise InvalidInputValue( + "given row for start_range is outside given values: start range row ({}) >= rows in values {}".format( + row, len(values) + ) + ) + + if col >= len(values[row]): + raise InvalidInputValue( + "given column for start_range is outside given values: start range column ({}) >= columns in values {}".format( + col, len(values[row]) + ) + ) + + if direction == TableDirection.down: + rightMost = col + bottomMost = _expand_bottom(values, row, len(values), col) + + if direction == TableDirection.right: + bottomMost = row + rightMost = _expand_right(values, col, len(values[row]), row) + + if direction == TableDirection.table: + rightMost = _expand_right(values, col, len(values[row]), row) + bottomMost = _expand_bottom(values, row, len(values), col) + + result = [] + + # build resulting array + for rows in values[row : bottomMost + 1]: + result.append(rows[col : rightMost + 1]) + + return result + + # SHOULD NOT BE NEEDED UNTIL NEXT MAJOR VERSION # DEPRECATION_WARNING_TEMPLATE = ( # "[Deprecated][in version {v_deprecated}]: {msg_deprecated}" diff --git a/gspread/worksheet.py b/gspread/worksheet.py index 463a7278..109c44db 100644 --- a/gspread/worksheet.py +++ b/gspread/worksheet.py @@ -41,6 +41,7 @@ PasteOrientation, PasteType, T, + TableDirection, ValidationConditionType, ValueInputOption, ValueRenderOption, @@ -53,6 +54,7 @@ convert_colors_to_hex_value, convert_hex_to_colors_dict, fill_gaps, + find_table, finditem, get_a1_from_absolute_range, is_full_a1_notation, @@ -3336,3 +3338,56 @@ def add_validation( } return self.client.batch_update(self.spreadsheet_id, body) + + def expand( + self, + top_left_range_name: str = "A1", + direction: TableDirection = TableDirection.table, + ) -> List[List[str]]: + """Expands a cell range based on non-null adjacent cells. + + Expand can be done in 3 directions defined in :class:`~gspread.utils.TableDirection` + + * ``TableDirection.right``: expands right until the first empty cell + * ``TableDirection.down``: expands down until the first empty cell + * ``TableDirection.table``: expands right until the first empty cell and down until the first empty cell + + In case of empty result an empty list is restuned. + + When the given ``start_range`` is outside the given matrix of values the exception + :class:`~gspread.exceptions.InvalidInputValue` is raised. + + Example:: + + values = [ + ['', '', '', '', '' ], + ['', 'B2', 'C2', '', 'E2'], + ['', 'B3', 'C3', '', 'E3'], + ['', '' , '' , '', 'E4'], + ] + >>> utils.find_table(TableDirection.table, 'B2') + [ + ['B2', 'C2'], + ['B3', 'C3'], + ] + + + .. note:: + + the ``TableDirection.table`` will look right from starting cell then look down from starting cell. + It will not check cells located inside the table. This could lead to + potential empty values located in the middle of the table. + + .. note:: + + when it is necessary to use non-default options for :meth:`~gspread.worksheet.Worksheet.get`, + please get the data first using desired options then use the function + :func:`gspread.utils.find_table` to extract the desired table. + + :param str top_left_range_name: the top left corner of the table to expand. + :param gspread.utils.TableDirection direction: the expand direction + :rtype list(list): the resulting matrix + """ + + values = self.get(pad_values=True) + return find_table(values, top_left_range_name, direction) diff --git a/tests/utils_test.py b/tests/utils_test.py index 187e74eb..f64b759a 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -503,3 +503,173 @@ def test_to_records(self): # given key are unordered # but they must match a value from the given input values self.assertIn(record[key], values[i]) + + def test_find_table_simple(self): + """Test find table with basic case""" + values = [ + ["A1", "B1", "C1", "D1"], + ["", "B2", "C2", "", "E2"], + ["", "B3", "C3", "D3", "E3"], + ["A4", "", "C4", "D4", "E4"], + ] + + table = utils.find_table( + values, + "B2", + utils.TableDirection.table, + ) + + table_max_row_max_column = utils.find_table( + values, + "D3", + utils.TableDirection.table, + ) + right = utils.find_table( + values, + "B2", + utils.TableDirection.right, + ) + down = utils.find_table( + values, + "B2", + utils.TableDirection.down, + ) + single = utils.find_table(values, "D1", utils.TableDirection.table) + no_values = utils.find_table(values, "A2", utils.TableDirection.table) + + table_values = [ + ["B2", "C2"], + ["B3", "C3"], + ] + + for rowindex, row in enumerate(table_values): + self.assertListEqual(row, table[rowindex]) + + table_max_row_max_column_values = [ + ["D3", "E3"], + ["D4", "E4"], + ] + + for rowindex, row in enumerate(table_max_row_max_column): + self.assertListEqual(row, table_max_row_max_column_values[rowindex]) + + right_values = [ + ["B2", "C2"], + ] + for rowindex, row in enumerate(right_values): + self.assertListEqual(row, right[rowindex]) + + bottom_values = [ + ["B2"], + ["B3"], + ] + for rowindex, row in enumerate(bottom_values): + self.assertListEqual(row, down[rowindex]) + + self.assertEqual(len(single), 1) + self.assertEqual(len(single[0]), 1) + self.assertEqual(single[0][0], "D1") + self.assertEqual(no_values, []) + + def test_find_table_inner_gap(self): + """Test find table with gap in header""" + values = [ + ["A1", "B1", "C1", ""], + ["A2", "", "C2", ""], + ["A3", "B3", "C3", ""], + ["", "", "", ""], + ] + expected_table = [ + ["A1", "B1", "C1"], + ["A2", "", "C2"], + ["A3", "B3", "C3"], + ] + + table = utils.find_table( + values, + "A1", + utils.TableDirection.table, + ) + + for rowindex, row in enumerate(expected_table): + self.assertListEqual(row, table[rowindex]) + + def test_find_table_first_row_gap(self): + """Test find table with first cell empty""" + values = [ + ["A1", "", "C1", ""], + ["A2", "B2", "C2", ""], + ["A3", "B3", "C3", ""], + ["", "", "", ""], + ] + expected_table = [ + ["A1"], + ["A2"], + ["A3"], + ] + + table = utils.find_table( + values, + "A1", + utils.TableDirection.table, + ) + + for rowindex, row in enumerate(expected_table): + self.assertListEqual(row, table[rowindex]) + + def test_find_table_first_column_gap(self): + """Test find table with a gap in first column""" + values = [ + ["A1", "B1", "C1", ""], + ["", "B2", "C2", ""], + ["A3", "B3", "C3", ""], + ["", "", "", ""], + ] + expected_table = [ + ["A1", "B1", "C1"], + ] + + table = utils.find_table( + values, + "A1", + utils.TableDirection.table, + ) + + for rowindex, row in enumerate(expected_table): + self.assertListEqual(row, table[rowindex]) + + def test_find_table_last_column_gap(self): + """Test find table with a gap in last column""" + values = [ + ["A1", "B1", "C1", ""], + ["A2", "B2", "", ""], + ["A3", "B3", "C3", ""], + ["", "", "", ""], + ] + expected_table = [ + ["A1", "B1", "C1"], + ["A2", "B2", ""], + ["A3", "B3", "C3"], + ] + + table = utils.find_table( + values, + "A1", + utils.TableDirection.table, + ) + + for rowindex, row in enumerate(expected_table): + self.assertListEqual(row, table[rowindex]) + + def test_find_table_empty_top_left_corner(self): + """Test find table with an empty top left cell and empty adjacent cells""" + + values = [ + ["", "", "C1", ""], + ["", "B2", "C2", ""], + ["", "B3", "C3", ""], + ] + + table = utils.find_table(values, "A1", utils.TableDirection.table) + + self.assertListEqual(table, [], "resulting table should be empty")