Skip to content

Commit

Permalink
Handle version/name UA strings
Browse files Browse the repository at this point in the history
  • Loading branch information
thinkwelltwd committed Apr 4, 2020
1 parent bf613ff commit b07f6b5
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 15 deletions.
4 changes: 2 additions & 2 deletions device_detector/parser/client/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ class BaseClientParser(Parser):

def name_version_pairs(self) -> list:

cached = DDCache['user_agents'][self.ua_hash].get('name_version_pairs', [])
if cached:
cached = DDCache['user_agents'][self.ua_hash].get('name_version_pairs', None)
if cached is not None:
return cached

name_version_pairs = key_value_pairs(ua=self.user_agent)
Expand Down
85 changes: 72 additions & 13 deletions device_detector/parser/key_value_pairs.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,27 @@
re.IGNORECASE,
)

REGEXES = (

# Extra version / name from UAs
VERSION_NAME_REGEXES = (
# 1.172.0.1 - LIVE - Mar 5 2020
# 17build 113411 LIVE Sep 17 20180
re.compile(
r'(?P<version>[\d\.]+)[ \-]+(?P<name>LIVE)',
re.IGNORECASE,
),

# 15.5.53 Boxcar
# 165 CandyCanes
re.compile(
r'^(?P<version>[\d\.]+)[ \-/]+(?P<name>\w+)$',
re.IGNORECASE,
),
)


# Extra name / version from UAs
NAME_VERSION_REGEXES = (

# Get ALL <key>/<value> pairs from the regex
re.compile(
Expand Down Expand Up @@ -72,23 +92,21 @@ def name_matches_regex(name) -> bool:
return False


def extract_pairs(regex, ua):
def scrub_name_version_pairs(matches: list) -> list:
"""
Extract all key/value pairs of the specified regex,
and return pairs along with unmatched portion of ua string.
Takes list of (name,version) tuples.
Remove all pairs where name matches SKIP patterns
"""
matches = regex.findall(ua)
substring = ua

if matches:
substring = regex.sub(' ', ua)

pairs = []
for name, version in matches:
name = name.strip(' -,')
if not name:
continue

# does this look like base64 encoded data?
if name.endswith('=='):
continue

name_lower = name.lower()
if name_lower in SKIP_PREFIXES:
continue
Expand All @@ -99,6 +117,37 @@ def extract_pairs(regex, ua):
code = name_lower.replace(' ', '')
pairs.append((code, name, version.strip()))

return pairs


def extract_version_name_pairs(regex, ua):
"""
Extract all key/value pairs of the specified regex,
where key==version and value==name
and return pairs along with unmatched portion of ua string.
"""
match = regex.search(ua)

if match:
return scrub_name_version_pairs([(match.group('name'), match.group('version'))])

return []


def extract_name_version_pairs(regex, ua):
"""
Extract all key/value pairs of the specified regex,
where key==name and value==version
and return pairs along with unmatched portion of ua string.
"""
matches = regex.findall(ua)
substring = ua

if matches:
substring = regex.sub(' ', ua)

pairs = scrub_name_version_pairs(matches)

return pairs, substring


Expand All @@ -111,14 +160,24 @@ def key_value_pairs(ua):

all_pairs = []

for rgx in REGEXES:
pairs, substring = extract_pairs(rgx, substring)
for rgx in VERSION_NAME_REGEXES:
pairs = extract_version_name_pairs(rgx, substring)
if pairs:
all_pairs.extend(pairs)

# <version>/<name> regexes will be much less common
# so if we found such entries return then first
if all_pairs:
return all_pairs

for rgx in NAME_VERSION_REGEXES:
pairs, substring = extract_name_version_pairs(rgx, substring)
all_pairs.extend(pairs)

return all_pairs


__all__ = (
'extract_pairs',
'extract_name_version_pairs',
'key_value_pairs',
)
25 changes: 25 additions & 0 deletions device_detector/tests/fixtures/local/app_names.yml
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,31 @@
client:
name: Akamai NetSession C-API
version: ''
-
user_agent: 1.172.0.1 - LIVE - Mar 5 2020
client:
name: LIVE
version: 1.172.0.1
-
user_agent: 17build 113411 LIVE Sep 17 20180
client:
name: LIVE
version: 113411
-
user_agent: 11.3.27 Boxcar
client:
name: Boxcar
version: 11.3.27
-
user_agent: 1.3.7 CoalCar
client:
name: CoalCar
version: 1.3.7
-
user_agent: 12.31.79/Caboose
client:
name: Caboose
version: 12.31.79
# --------------------------------------------------------------------------------
# UA Strings with no interesting pairs should fall back to browser details
-
Expand Down

0 comments on commit b07f6b5

Please sign in to comment.