diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 0000000..fb79665 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,39 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: ${{ secrets.pypi_username }} + password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..86db358 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,25 @@ +name: Lint and Test + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest pytest-asyncio + pip install -r requirements.txt + - name: Running tests + run: | + pytest agentbrowser/test.py + pytest agentbrowser/test_async.py \ No newline at end of file diff --git a/agentbrowser/__init__.py b/agentbrowser/__init__.py index 53c748f..a547260 100644 --- a/agentbrowser/__init__.py +++ b/agentbrowser/__init__.py @@ -1,33 +1 @@ -""" -agentbrowser - -A browser for your agent, built on Chrome and Pyppeteer. -""" - -__version__ = "0.1.1" -__author__ = "Moon (https://github.com/lalalune)" -__credits__ = "https://github.com/lalalune/agentbrowser" - -from .browser import ( - get_browser, - init_browser, - create_page, - close_page, - navigate_to, - get_body_text, - get_document_html, - get_body_html, - evaluate_javascript -) - -__all__ = [ - "get_browser", - "init_browser", - "create_page", - "navigate_to", - "close_page", - "get_document_html", - "get_body_text", - "get_body_html", - "evaluate_javascript", -] +from .browser import * \ No newline at end of file diff --git a/agentbrowser/browser.py b/agentbrowser/browser.py index 0a54448..4522db1 100644 --- a/agentbrowser/browser.py +++ b/agentbrowser/browser.py @@ -7,59 +7,93 @@ browser = None - def get_browser(): - check_browser_inited() + if browser is None: + init_browser() + return browser + +async def async_get_browser(): + if browser is None: + await async_init_browser() return browser def init_browser(headless=True, executable_path=None): - if browser is not None: - asyncio.get_event_loop().run_until_complete(browser.close()) + asyncio.get_event_loop().run_until_complete( + async_init_browser(headless, executable_path) + ) - if executable_path is None: - executable_path = find_chrome() - async def init(): - global browser +def create_page(site=None): + return asyncio.get_event_loop().run_until_complete(async_create_page(site)) - def handle_interrupt(): - asyncio.ensure_future(browser.close()) - asyncio.get_event_loop().stop() - browser = await launch(headless=headless, executablePath=executable_path) - signal.signal(signal.SIGINT, handle_interrupt) +def close_page(page): + asyncio.get_event_loop().run_until_complete(async_close_page(page)) - asyncio.get_event_loop().run_until_complete(init()) +def navigate_to(url, page): + return asyncio.get_event_loop().run_until_complete(async_navigate_to(url, page)) -def check_browser_inited(): - if browser is None: - init_browser() +def get_document_html(page): + return asyncio.get_event_loop().run_until_complete(async_get_document_html(page)) -def create_page(site=None): - check_browser_inited() - page = asyncio.get_event_loop().run_until_complete(browser.newPage()) + +def get_body_text(page): + return asyncio.get_event_loop().run_until_complete(async_get_body_text(page)) + + +def get_body_text_raw(page): + return asyncio.get_event_loop().run_until_complete(async_get_body_text_raw(page)) + + +def get_body_html(page): + return asyncio.get_event_loop().run_until_complete(async_get_body_html(page)) + + +def evaluate_javascript(code, page): + return asyncio.get_event_loop().run_until_complete( + async_evaluate_javascript(code, page) + ) + + +# async version of init_browser +async def async_init_browser(headless=True, executable_path=None): + global browser + + if executable_path is None: + executable_path = find_chrome() + + if browser is None: + browser = await launch(headless=headless, executablePath=executable_path, autoClose=False) + return browser + +# async version of create_page +async def async_create_page(site=None): + global browser + new_browser = None + if browser is None: + new_browser = await async_init_browser() + else: + new_browser = browser + page = await new_browser.newPage() if site: - asyncio.get_event_loop().run_until_complete( - page.goto(site, {"waitUntil": ["domcontentloaded", "networkidle0"]}) - ) + await page.goto(site, {"waitUntil": ["domcontentloaded", "networkidle0"]}) return page -def close_page(page): - asyncio.get_event_loop().run_until_complete(page.close()) +# async version of close_page +async def async_close_page(page): + await page.close() -def navigate_to(url, page): - check_browser_inited() +# async version of navigate_to +async def async_navigate_to(url, page): if not page: - page = create_page(None) + page = await async_create_page(None) try: - asyncio.get_event_loop().run_until_complete( - page.goto(url, {"waitUntil": ["domcontentloaded", "networkidle0"]}) - ) + await page.goto(url, {"waitUntil": ["domcontentloaded", "networkidle0"]}) except Exception as e: print("Error navigating to: " + url) print(e) @@ -67,80 +101,28 @@ def navigate_to(url, page): return page -def get_document_html(page): - return asyncio.get_event_loop().run_until_complete(page.content()) - +# async version of get_document_html +async def async_get_document_html(page): + return await page.content() -def get_body_text(page): - # get the body, but remove some junk first - output = asyncio.get_event_loop().run_until_complete( - page.Jeval( - "body", - """ - (element) => { - const element_blacklist = [ - "sidebar", - "footer", - "account", - "login", - "signup", - "search", - "advertisement", - "masthead", - "popup", - "floater", - "modal", - ]; - // first, filter out all the script tags, noscript tags,