You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Hi team, it would be helpful to expose a page limit parameter to users that would allow for generic requests like 'climate' to stop after a certain amount of pages, if so chosen by the user, this could be the expected behaviour:
class EdgarTextSearcher:
#---> perhaps adjust like this?
def _fetch_search_request_results(
self,
search_request_url_args: str,
min_wait_seconds: float,
max_wait_seconds: float,
retries: int,
max_pages: Optional[int] = None # New parameter
) -> Iterator[Iterator[Dict[str, Any]]]:
"""
Fetches the results for the given search request and paginates through the results.
:param search_request_url_args: URL-encoded request arguments string to concatenate to the SEC website URL
:param min_wait_seconds: minimum number of seconds to wait for the request to complete
:param max_wait_seconds: maximum number of seconds to wait for the request to complete
:param retries: number of times to retry the request before failing
:param max_pages: maximum number of pages to fetch (optional)
:return: Iterator of dictionaries representing the parsed table rows
"""
# Fetch first page, verify that the request was successful by checking the results table appears on the page
self.json_response = fetch_page(
f"{TEXT_SEARCH_BASE_URL}{search_request_url_args}",
min_wait_seconds,
max_wait_seconds,
retries,
)(
lambda json_response: json_response.get("error") is None
and json_response.get("hits", {}).get("hits", 0) != 0,
f"First search request failed for URL {TEXT_SEARCH_BASE_URL}{search_request_url_args} ...",
)
# Get number of pages
num_pages = self._compute_number_of_pages()
# Limit the number of pages if max_pages is specified
if max_pages is not None:
num_pages = min(num_pages, max_pages)
print(f"Limiting search to {num_pages} pages")
for i in range(1, num_pages + 1):
paginated_url = f"{TEXT_SEARCH_BASE_URL}{search_request_url_args}&page={i}&from={100*(i-1)}"
try:
self.json_response = fetch_page(
paginated_url,
min_wait_seconds,
max_wait_seconds,
retries,
)(
lambda json_response: json_response.get("error") is None,
f"Search request failed for page {i} at URL {paginated_url}, skipping page...",
)
if self.json_response.get("hits", {}).get("hits", 0) == 0:
raise ResultsTableNotFoundError()
page_results = self._parse_table_rows(paginated_url)
yield page_results
except PageCheckFailedError as e:
print(e)
continue
except ResultsTableNotFoundError:
print(
f"Could not find results table on page {i} at URL {paginated_url}, skipping page..."
)
continue
except Exception as e:
print(
f"Unexpected {e.__class__.__name__} error occurred while fetching page {i} at URL {paginated_url}, skipping page: {e}"
)
continue
def text_search(
self,
keywords: List[str],
entity_id: Optional[str],
filing_form: Optional[str],
single_forms: Optional[str],
start_date: date,
end_date: date,
min_wait_seconds: float,
max_wait_seconds: float,
retries: int,
destination: str,
peo_in: Optional[str],
inc_in: Optional[str],
max_pages: Optional[int] = None # New parameter
) -> None:
"""
Searches the SEC website for filings based on the given parameters.
:param keywords: Search keywords to input in the "Document word or phrase" field
:param entity_id: Entity/Person name, ticker, or CIK number to input in the "Company name, ticker, or CIK" field
:param filing_form: Group to select within the filing category dropdown menu, defaults to None
:param single_forms: List of single forms to search for (e.g. ['10-K', '10-Q']), defaults to None
:param start_date: Start date for the custom date range
:param end_date: End date for the custom date range
:param min_wait_seconds: Minimum number of seconds to wait for the request to complete
:param max_wait_seconds: Maximum number of seconds to wait for the request to complete
:param retries: Number of times to retry the request before failing
:param destination: Name of the CSV file to write the results to
:param peo_in: Search principal executive offices in a location (e.g. "NY,OH")
:param inc_in: Search incorporated in a location (e.g. "NY,OH")
:param max_pages: Maximum number of pages to fetch (optional)
"""
self._generate_search_requests(
keywords=keywords,
entity_id=entity_id,
filing_form=filing_form,
single_forms=single_forms,
start_date=start_date,
end_date=end_date,
min_wait_seconds=min_wait_seconds,
max_wait_seconds=max_wait_seconds,
retries=retries,
peo_in=peo_in,
inc_in=inc_in,
)
search_requests_results: List[Iterator[Iterator[Dict[str, Any]]]] = []
for r in self.search_requests:
# Run generated search requests and paginate through results
try:
all_pages_results: Iterator[Iterator[Dict[str, Any]]] = (
self._fetch_search_request_results(
search_request_url_args=r,
min_wait_seconds=min_wait_seconds,
max_wait_seconds=max_wait_seconds,
retries=retries,
max_pages=max_pages # Pass the new parameter
)
)
search_requests_results.append(all_pages_results)
except Exception as e:
print(
f"Skipping search request due to an unexpected {e.__class__.__name__} for request parameters '{r}': {e}"
)
if(search_requests_results == []):
raise NoResultsFoundError(f"No results found for the search query")
write_results_to_file(
itertools.chain(*search_requests_results),
destination,
TEXT_SEARCH_CSV_FIELDS_NAMES,
)
and then called like this
edgar_searcher = EdgarTextSearcher()
edgar_searcher.text_search(
keywords=["Resignations"],
entity_id=None,
filing_form=None,
single_forms=None,
start_date=date(2024, 1, 1),
end_date=date(2024, 12, 31),
min_wait_seconds=0.1,
max_wait_seconds=0.5,
retries=3,
destination="results.csv",
peo_in=None,
inc_in=None,
max_pages=5 # This will limit the search to the first 5 pages
)
Thanks for the nice tool!
The text was updated successfully, but these errors were encountered:
Hi team, it would be helpful to expose a page limit parameter to users that would allow for generic requests like 'climate' to stop after a certain amount of pages, if so chosen by the user, this could be the expected behaviour:
and then called like this
Thanks for the nice tool!
The text was updated successfully, but these errors were encountered: