Skip to content

Commit

Permalink
remove deprecated method fetch_concurrently
Browse files Browse the repository at this point in the history
  • Loading branch information
pacman82 committed Oct 13, 2024
1 parent 727c8ff commit c4979e5
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 62 deletions.
1 change: 1 addition & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## 8.0.0

- Parameter `fetch_concurrently` now defaults to `True`. This causes `arrow-odbc-py` to use more memory by default, but enables fetching concurrently with the rest of the application logic. You can set this parameter explicitly to `False` to get the old behaviour.
- Removed deprecated method `fetch_concurrently`. Use the paremeter instead.

## 7.1.0

Expand Down
53 changes: 0 additions & 53 deletions python/arrow_odbc/reader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from typing import List, Optional, Callable
from typing_extensions import deprecated
from cffi.api import FFI # type: ignore

import pyarrow
Expand Down Expand Up @@ -321,58 +320,6 @@ def into_pyarrow_record_batch_reader(self):
tmp.schema, self.schema = self.schema, tmp.schema
return pyarrow.RecordBatchReader.from_batches(tmp.schema, tmp)

@deprecated(
"Please use the fetch_concurrently argument on read_arrow_batches_from_odbc or " \
"more_results instead."
)
def fetch_concurrently(self):
"""
Allocate another transit buffer and use it to fetch row set groups (aka. batches) from the
ODBC data source in a dedicated system thread, while the main thread converts the previous
batch to arrow arrays and executes the application logic.
If you extract more than one result set from the cursor, you need to call these method for
each result set you want to extract concurrently. This has been done so it is possible to
skip result sets without worrying about the fetching thread to start fetching row groups
from a result set you intended to skip.
Calling this method on an already concurrent reader has no effect.
Example:
.. code-block:: python
from arrow_odbc import read_arrow_batches_from_odbc
connection_string="Driver={ODBC Driver 17 for SQL Server};Server=localhost;"
reader = read_arrow_batches_from_odbc(
query=f"SELECT * FROM MyTable,
connection_string=connection_string,
batch_size=1000,
user="SA",
password="My@Test@Password",
)
# Trade memory for speed. For the price of an additional transit buffer and a native
# system thread we fetch batches now concurrent to our application logic.
reader.fetch_concurrently()
for batch in reader:
# Process arrow batches
df = batch.to_pandas()
# ...
"""
try:
self.reader.into_concurrent()
except Exception:
# Making a reader concurrent will not change its schema, yet if there is an error the
# reader is destroyed and its schema is empty.
# self.schema == self.reader.schema()
# should always be true and so asigning it never would make the code incorrect. Yet we
# only need to do so if it actually changes.
self.schema = self.reader.schema()
raise


def read_arrow_batches_from_odbc(
query: str,
Expand Down
14 changes: 5 additions & 9 deletions tests/test_arrow_odbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,7 @@ def test_making_an_empty_reader_concurrent_is_no_error():

reader = read_arrow_batches_from_odbc(query=query, batch_size=100, connection_string=MSSQL)
# Move to a second result set, which does not exist
reader.more_results(batch_size=100)
# Fetch the non-existing result set concurrently. This should leave the reader unchanged
reader.fetch_concurrently()
reader.more_results(batch_size=100, fetch_concurrently=True)

# Assert schema and batches are empty
assert reader.schema == pa.schema([])
Expand Down Expand Up @@ -242,20 +240,18 @@ def test_fetch_concurrently():
next(it)


def test_concurrent_reader_into_concurrent():
def test_fetch_sequential():
"""
Turning an already concurrent reader into a concurrent reader has no additional effect and
leaves the reader valid.
Use a sequential batch reader to fetch one row
"""
table = "FetchAlreadyConcurrently"
table = "FetchConcurrently"
setup_table(table=table, column_type="int", values=["42"])

query = f"SELECT * FROM {table}"

reader = read_arrow_batches_from_odbc(
query=query, batch_size=100, connection_string=MSSQL, fetch_concurrently=True
query=query, batch_size=100, connection_string=MSSQL, fetch_concurrently=False
)
reader.fetch_concurrently() # Transforming already concurrent reader into concurrent reader
it = iter(reader)

actual = next(it)
Expand Down

0 comments on commit c4979e5

Please sign in to comment.