From c4979e55225f5f663fb930679d08b3a93d2ec3ce Mon Sep 17 00:00:00 2001 From: Markus Klein Date: Sun, 13 Oct 2024 21:59:08 +0200 Subject: [PATCH] remove deprecated method fetch_concurrently --- Changelog.md | 1 + python/arrow_odbc/reader.py | 53 ------------------------------------- tests/test_arrow_odbc.py | 14 ++++------ 3 files changed, 6 insertions(+), 62 deletions(-) diff --git a/Changelog.md b/Changelog.md index 77dbb21..fec5872 100644 --- a/Changelog.md +++ b/Changelog.md @@ -3,6 +3,7 @@ ## 8.0.0 - Parameter `fetch_concurrently` now defaults to `True`. This causes `arrow-odbc-py` to use more memory by default, but enables fetching concurrently with the rest of the application logic. You can set this parameter explicitly to `False` to get the old behaviour. +- Removed deprecated method `fetch_concurrently`. Use the paremeter instead. ## 7.1.0 diff --git a/python/arrow_odbc/reader.py b/python/arrow_odbc/reader.py index 2ec07c7..107f4e2 100644 --- a/python/arrow_odbc/reader.py +++ b/python/arrow_odbc/reader.py @@ -1,5 +1,4 @@ from typing import List, Optional, Callable -from typing_extensions import deprecated from cffi.api import FFI # type: ignore import pyarrow @@ -321,58 +320,6 @@ def into_pyarrow_record_batch_reader(self): tmp.schema, self.schema = self.schema, tmp.schema return pyarrow.RecordBatchReader.from_batches(tmp.schema, tmp) - @deprecated( - "Please use the fetch_concurrently argument on read_arrow_batches_from_odbc or " \ - "more_results instead." - ) - def fetch_concurrently(self): - """ - Allocate another transit buffer and use it to fetch row set groups (aka. batches) from the - ODBC data source in a dedicated system thread, while the main thread converts the previous - batch to arrow arrays and executes the application logic. - - If you extract more than one result set from the cursor, you need to call these method for - each result set you want to extract concurrently. This has been done so it is possible to - skip result sets without worrying about the fetching thread to start fetching row groups - from a result set you intended to skip. - - Calling this method on an already concurrent reader has no effect. - - Example: - - .. code-block:: python - - from arrow_odbc import read_arrow_batches_from_odbc - - connection_string="Driver={ODBC Driver 17 for SQL Server};Server=localhost;" - - reader = read_arrow_batches_from_odbc( - query=f"SELECT * FROM MyTable, - connection_string=connection_string, - batch_size=1000, - user="SA", - password="My@Test@Password", - ) - # Trade memory for speed. For the price of an additional transit buffer and a native - # system thread we fetch batches now concurrent to our application logic. - reader.fetch_concurrently() - - for batch in reader: - # Process arrow batches - df = batch.to_pandas() - # ... - """ - try: - self.reader.into_concurrent() - except Exception: - # Making a reader concurrent will not change its schema, yet if there is an error the - # reader is destroyed and its schema is empty. - # self.schema == self.reader.schema() - # should always be true and so asigning it never would make the code incorrect. Yet we - # only need to do so if it actually changes. - self.schema = self.reader.schema() - raise - def read_arrow_batches_from_odbc( query: str, diff --git a/tests/test_arrow_odbc.py b/tests/test_arrow_odbc.py index aaf6014..dad0b0f 100644 --- a/tests/test_arrow_odbc.py +++ b/tests/test_arrow_odbc.py @@ -171,9 +171,7 @@ def test_making_an_empty_reader_concurrent_is_no_error(): reader = read_arrow_batches_from_odbc(query=query, batch_size=100, connection_string=MSSQL) # Move to a second result set, which does not exist - reader.more_results(batch_size=100) - # Fetch the non-existing result set concurrently. This should leave the reader unchanged - reader.fetch_concurrently() + reader.more_results(batch_size=100, fetch_concurrently=True) # Assert schema and batches are empty assert reader.schema == pa.schema([]) @@ -242,20 +240,18 @@ def test_fetch_concurrently(): next(it) -def test_concurrent_reader_into_concurrent(): +def test_fetch_sequential(): """ - Turning an already concurrent reader into a concurrent reader has no additional effect and - leaves the reader valid. + Use a sequential batch reader to fetch one row """ - table = "FetchAlreadyConcurrently" + table = "FetchConcurrently" setup_table(table=table, column_type="int", values=["42"]) query = f"SELECT * FROM {table}" reader = read_arrow_batches_from_odbc( - query=query, batch_size=100, connection_string=MSSQL, fetch_concurrently=True + query=query, batch_size=100, connection_string=MSSQL, fetch_concurrently=False ) - reader.fetch_concurrently() # Transforming already concurrent reader into concurrent reader it = iter(reader) actual = next(it)