From 56b78cc6ce8e71bf862f46b811ad5b9dd88dcb7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20Gardfj=C3=A4ll?= Date: Sun, 1 Oct 2023 15:26:54 +0200 Subject: [PATCH] add impersonate-browser extra: uses curl_cffi for browser impersonation This commit tries to more closely mimic web browser behavior in an attempt to circumvent Cloudflare's bot protection. If built with the 'impersonate-browser' extra, garminexport will use curl_cffi and a patched libcurl to produce TLS fingerprints that are identical to those of a real web browsers. The prior 'cloudflare' extra is dropped in favor of the new 'impersonate-browser' extra. --- Makefile | 2 +- README.md | 127 ++++++++++-------- garminexport/cli/backup.py | 9 -- garminexport/garminclient.py | 201 ++++++++++++++--------------- garminexport/incremental_backup.py | 8 +- setup.cfg | 8 +- 6 files changed, 179 insertions(+), 176 deletions(-) diff --git a/Makefile b/Makefile index 52b6e0cd..68be719b 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ venv: $(VENV_DIR) # install pinned dependencies and package itself in editable mode dep-sync: $(VENV_DIR) - (source $(VENV_ACTIVATE); pip install -r requirements-dev.txt ; pip install -e . --no-deps) + (source $(VENV_ACTIVATE); pip install -r requirements-dev.txt ; pip install -e .[impersonate_browser] --no-deps) # creates a virtualenv with development dependencies installed dev-init: dep-sync diff --git a/README.md b/README.md index a297811b..2dde0e69 100644 --- a/README.md +++ b/README.md @@ -5,50 +5,69 @@ # About -`garminexport` is both a library and a tool for downloading/backing up [Garmin -Connect](http://connect.garmin.com/) activities to a local disk. +`garminexport` is both a library and a tool for downloading/backing up +[Garmin Connect](http://connect.garmin.com/) activities to a local disk. The main utility script is called `garmin-backup` and performs incremental backups of your Garmin account to a local directory. The first time -`garmin-backup` is run, it will download *all* activities. After that, it will +`garmin-backup` is run, it will download _all_ activities. After that, it will do incremental backups of your account. That is, the script will only download activities that haven't already been downloaded to the backup directory. - # Installation `garminexport` is available on [PyPi](https://pypi.org/) and can be installed -with [pip](http://pip.readthedocs.org): +with [pip](http://pip.readthedocs.org). + +> **WARNING** +> +> GarminConnect employs Cloudflare's bot protection to prevent scripted access +> to their services. Therefore a [vanilla-installation](#vanilla installation) +> is no longer likely to work. Instead, try the [browser-impersonating +> installation](#browser-impersonating installation). + +## Vanilla installation + +To only install `garminexport` and required dependencies run: -``` bash +```bash pip install garminexport ``` +## Browser-impersonating installation + To install `garminexport` with support to circumvent Cloudflare's bot protection -(which has been known to impact some users) you can install the module with the -`cloudflare` -[extra](https://setuptools.pypa.io/en/latest/userguide/dependency_management.html#optional-dependencies) like so: +you should install the module with the `impersonate_browser` +[extra](https://setuptools.pypa.io/en/latest/userguide/dependency_management.html#optional-dependencies) +like so: -``` bash -pip install garminexport[cloudflare] +```bash +pip install garminexport[impersonate_browser] ``` This replaces the default [requests](https://github.com/psf/requests) library -with [cloudscraper](https://github.com/VeNoMouS/cloudscraper) for HTTP request. +with [curl_cffi](https://github.com/yifeikong/curl_cffi) for HTTP session +handling. + +When `curl_cffi` is used, the `GARMINEXPORT_IMPERSONATE_BROWSER` environment +variable can be used to control which browser is impersonated (default is +`chrome110`, see +[full list](https://github.com/lwthiker/curl-impersonate#supported-browsers)). # Usage ## Prerequisites -To be of any use you need to register an account at [Garmin -Connect](http://connect.garmin.com/) and populate it with some activities. +To be of any use you need to register an account at +[Garmin Connect](http://connect.garmin.com/) and populate it with some +activities. ## As a command-line tool (garmin-backup) The backup program is run as follows (use the `--help` flag for a full list of available options): -``` bash +```bash garmin-backup --backup-dir=activities ``` @@ -57,54 +76,52 @@ in to your Garmin Connect account to download activities to the specified backup directory on your machine. The program will only download activities that aren't already in the backup directory. -Activities can be exported in any of the formats outlined below. Note that -by default, the program downloads all formats for every activity. Use the +Activities can be exported in any of the formats outlined below. Note that by +default, the program downloads all formats for every activity. Use the `--format` option to narrow the selection. Supported export formats: +- `gpx`: activity GPX file (XML). - - `gpx`: activity GPX file (XML). + [GPX](https://en.wikipedia.org/wiki/GPS_Exchange_Format) is an open + format, mainly for storing GPS routes/tracks. It does support extensions and + Garmin appears to annotate the GPS data with, for example, heart-rate and + cadence, when available on your device. - [GPX](https://en.wikipedia.org/wiki/GPS_Exchange_Format) is an open - format, mainly for storing GPS routes/tracks. It does support extensions - and Garmin appears to annotate the GPS data with, for example, heart-rate - and cadence, when available on your device. +- `tcx`: an activity TCX file (XML). _Note: a `.tcx` file may not always be + possible to export, for example if an activity was uploaded in gpx format. In + that case, Garmin won't try to synthesize a tcx file._ - - `tcx`: an activity TCX file (XML). - *Note: a `.tcx` file may not always be possible to export, for example - if an activity was uploaded in gpx format. In that case, Garmin won't try - to synthesize a tcx file.* + [TCX](https://en.wikipedia.org/wiki/Training_Center_XML) (Training Center + XML) is Garmin's own XML format. It is, essentially, an extension of GPX which + includes more metrics and divides the GPS track into "laps" as recorded by + your device (with "lap summaries" for each metric). - [TCX](https://en.wikipedia.org/wiki/Training_Center_XML) (Training - Center XML) is Garmin's own XML format. It is, essentially, an extension - of GPX which includes more metrics and divides the GPS track into "laps" - as recorded by your device (with "lap summaries" for each metric). +- `fit`: activity FIT file (binary format). _Note: a `.fit` file may not always + be possible to export, for example if an activity was entered manually rather + than imported from a Garmin device._ - - `fit`: activity FIT file (binary format). - *Note: a `.fit` file may not always be possible to export, for example - if an activity was entered manually rather than imported from a Garmin device.* + The [FIT](https://www.thisisant.com/resources/fit/) format is the "raw + data type" stored in your Garmin device and should contain all metrics your + device is capable of tracking (GPS, heart rate, cadence, etc). It's a binary + format, so tools are needed to read its content. - The [FIT](https://www.thisisant.com/resources/fit/) format is the - "raw data type" stored in your Garmin device and should contain all - metrics your device is capable of tracking (GPS, heart rate, cadence, - etc). It's a binary format, so tools are needed to read its content. +- `json_summary`: activity summary file (JSON). - - `json_summary`: activity summary file (JSON). + Provides summary data for an activity. Seems to lack a formal schema and + should not be counted on as a stable data format (it may change at any time). + Only included since it _may_ contain additional data that could be useful for + developers of analysis tools. - Provides summary data for an activity. Seems to lack a formal schema - and should not be counted on as a stable data format (it may change at any - time). Only included since it *may* contain additional data that could be - useful for developers of analysis tools. +- `json_details`: activity details file (JSON). - - `json_details`: activity details file (JSON). + Provides detailed activity data in a JSON format. Seems to lack a formal + schema and should not be counted on as a stable data format (it may change at + any time). Only included since it _may_ contain additional data that could be + useful for developers of analysis tools. - Provides detailed activity data in a JSON format. Seems to lack a - formal schema and should not be counted on as a stable data format (it may - change at any time). Only included since it *may* contain additional data - that could be useful for developers of analysis tools. - -All files are written to the same directory (`activities/` by default). Each +All files are written to the same directory (`activities/` by default). Each activity file is prefixed by its upload timestamp and its activity id. `garminexport` also contains a few smaller utility programs: @@ -121,14 +138,14 @@ To build your own tools around the Garmin Connect API you can import the with Garmin Connect. For example use, have a look at the command-line tools under [garminexport/cli](garminexport/cli). -For example, in your `setup.py`, `setup.cfg`, `pyproject.toml` ([PEP -631](https://peps.python.org/pep-0631/)) add something like: +For example, in your `setup.py`, `setup.cfg`, `pyproject.toml` +([PEP 631](https://peps.python.org/pep-0631/)) add something like: -``` python +```python install_requires=[ 'garminexport', - # also installs 'cloudscraper' as a dependency - # 'garminexport[cloudflare]', + # also installs 'impersonate_browser as a dependency + # 'garminexport[impersonate_browser]', ... ] ``` @@ -148,7 +165,5 @@ development environment) and install the required dependencies like so: # activate virtualenv source .venv/bin/activate - # code ... - # test make test diff --git a/garminexport/cli/backup.py b/garminexport/cli/backup.py index 66a563e2..87ab0619 100644 --- a/garminexport/cli/backup.py +++ b/garminexport/cli/backup.py @@ -18,11 +18,6 @@ DEFAULT_MAX_RETRIES = 7 """The default maximum number of retries to make when fetching a single activity.""" -DEFAULT_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36' -"""The default `User-Agent` to use for HTTP requests when none is supplied by -the user. -""" - def parse_args() -> argparse.Namespace: """Parse CLI arguments. @@ -63,9 +58,6 @@ def parse_args() -> argparse.Namespace: help=("The maximum number of retries to make on failed attempts to fetch an activity. " "Exponential backoff will be used, meaning that the delay between successive attempts " "will double with every retry, starting at one second. DEFAULT: {}").format(DEFAULT_MAX_RETRIES)) - parser.add_argument( - "--user-agent", type=str, default=DEFAULT_USER_AGENT, - help="A value to use for the `User-Agent` request header. Use an authentic browser agent string to prevent being blocked by Garmin. A tool such as `user_agent` (`ua`) can be used to generate such values.") return parser.parse_args() @@ -77,7 +69,6 @@ def main(): try: incremental_backup(username=args.username, password=args.password, - user_agent_fn=lambda:DEFAULT_USER_AGENT, backup_dir=args.backup_dir, export_formats=args.format, ignore_errors=args.ignore_errors, diff --git a/garminexport/garminclient.py b/garminexport/garminclient.py index d9628399..2f6ae0a6 100755 --- a/garminexport/garminclient.py +++ b/garminexport/garminclient.py @@ -2,57 +2,30 @@ """A module for authenticating against and communicating with selected parts of the Garmin Connect REST API. """ - +from builtins import range +from datetime import timedelta, datetime +import dateutil +import dateutil.parser +from functools import partial, wraps +from io import BytesIO import json import logging import os import os.path -import re +import requests import sys import zipfile -from datetime import timedelta, datetime -from builtins import range -from functools import wraps -from io import BytesIO - -import dateutil -import dateutil.parser - -# -# By default we use the requests library to create http clients. If built with -# the 'cloudflare' extra, we use cloudscraper to circumvent CloudFlare's -# anti-bot protection pages. -# -import requests -session_factory = requests.session -try: - import cloudscraper - session_factory = cloudscraper.create_scraper -except (ImportError): - pass from garminexport.retryer import Retryer, ExponentialBackoffDelayStrategy, MaxRetriesStopStrategy -# -# Note: For more detailed information about the API services -# used by this module, log in to your Garmin Connect account -# through the web browser and visit the API documentation page -# for the REST service of interest. For example: -# https://connect.garmin.com/proxy/activity-service-1.3/index.html -# https://connect.garmin.com/proxy/activity-search-service-1.2/index.html -# - -# -# Other useful references: -# https://github.com/cpfair/tapiriik/blob/master/tapiriik/services/GarminConnect/garminconnect.py -# https://forums.garmin.com/showthread.php?72150-connect-garmin-com-signin-question/page2 -# log = logging.getLogger(__name__) - # reduce logging noise from requests library logging.getLogger("requests").setLevel(logging.ERROR) + +PORTAL_LOGIN_URL = "https://sso.garmin.com/portal/api/login" +"""Garmin Connect's Single-Sign On login URL.""" SSO_LOGIN_URL = "https://sso.garmin.com/sso/login" """Garmin Connect's Single-Sign On login URL.""" SSO_SIGNIN_URL = "https://sso.garmin.com/sso/signin" @@ -93,23 +66,16 @@ class GarminClient(object): """ - def __init__(self, username, password, user_agent_fn=None): + def __init__(self, username, password): """Initialize a :class:`GarminClient` instance. :param username: Garmin Connect user name or email address. :type username: str :param password: Garmin Connect account password. :type password: str - :keyword user_agent_fn: A function that, when called, produces a - `User-Agent` string to be used as `User-Agent` for the remainder of the - session. If set to None, the default user agent of the http request - library is used. - :type user_agent_fn: Callable[[], str] - """ self.username = username self.password = password - self._user_agent_fn = user_agent_fn self.session = None @@ -122,8 +88,7 @@ def __exit__(self, exc_type, exc_value, traceback): self.disconnect() def connect(self): - log.debug("using session factory: %s", session_factory.__module__) - self.session = session_factory() + self.session = new_http_session() self._authenticate() def disconnect(self): @@ -132,68 +97,77 @@ def disconnect(self): self.session = None def _authenticate(self): + """ + Authenticates using a Garmin Connect username and password. + + The procedure has changed over the years. A good approach for figuring + it out is to use the browser development tools to trace all requests + following a sign-in. + """ log.info("authenticating user ...") - form_data = { - "username": self.username, - "password": self.password, - "embed": "false", - "_csrf": self._get_csrf_token(), - } - headers = { - 'origin': 'https://sso.garmin.com', - } - if self._user_agent_fn: - user_agent = self._user_agent_fn() - if not user_agent: - raise ValueError("user_agent_fn didn't produce a value") - headers['User-Agent'] = user_agent - - auth_response = self.session.post( - SSO_SIGNIN_URL, headers=headers, params=self._auth_params(), data=form_data) - log.debug("got auth response: %s", auth_response.text) - if auth_response.status_code != 200: - raise ValueError("authentication failure: did you enter valid credentials?") - auth_ticket_url = self._extract_auth_ticket_url(auth_response.text) + auth_ticket_url = self._login(self.username, self.password) log.debug("auth ticket url: '%s'", auth_ticket_url) - log.info("claiming auth ticket ...") - response = self.session.get(auth_ticket_url) - if response.status_code != 200: - raise RuntimeError( - "auth failure: failed to claim auth ticket: {}: {}\n{}".format( - auth_ticket_url, response.status_code, response.text)) + self._claim_auth_ticket(auth_ticket_url) - # appears like we need to touch base with the main page to complete the - # login ceremony. + # we need to touch base with the main page to complete the login ceremony. self.session.get('https://connect.garmin.com/modern') # This header appears to be needed on subsequent session requests or we # end up with a 402 response from Garmin. self.session.headers.update({'NK': 'NT'}) - def _get_csrf_token(self): - """Retrieves a Cross-Site Request Forgery (CSRF) token from Garmin's login - page. The token is passed along in the login form for increased - security.""" - log.info("fetching CSRF token ...") - resp = self.session.get(SSO_LOGIN_URL, params=self._auth_params()) - if resp.status_code != 200: - raise ValueError("auth failure: could not load {}".format(SSO_LOGIN_URL)) - # extract CSRF token - csrf_token = re.search(r'