Skip to content

Commit

Permalink
Enable ApplicationIDExtractor
Browse files Browse the repository at this point in the history
Check long UAs for gibberish
Add extra UA to normalize
  • Loading branch information
thinkwelltwd committed Sep 23, 2018
1 parent 5f1e1f1 commit e764c99
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 7 deletions.
48 changes: 41 additions & 7 deletions device_detector/device_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
Browser,
FeedReader,
Game,
SlashedNameExtractor,
Library,
MediaPlayer,
Messaging,
Expand All @@ -25,6 +24,10 @@
P2P,
PIM,
VPNProxy,

# Generic name extractors
ApplicationIDExtractor,
SlashedNameExtractor,
WholeNameExtractor,
)
from .settings import DDCache, WORTHLESS_UA_TYPES
Expand All @@ -37,6 +40,7 @@
}

trans_tbl = str.maketrans({p: '' for p in punctuation})
punctuation_tbl = str.maketrans({p: '' for p in ' /.'})


class DeviceDetector(RegexLoader):
Expand All @@ -45,9 +49,6 @@ class DeviceDetector(RegexLoader):
'local/device/normalize.yml',
]

# All registered Client Types
client_types = []

CLIENT_PARSERS = (
FeedReader,
Game,
Expand All @@ -60,8 +61,8 @@ class DeviceDetector(RegexLoader):
DesktopApp,
Browser,
Library,
# SlashedNameExtractor,
# WholeNameExtractor,
SlashedNameExtractor,
WholeNameExtractor,
)

DEVICE_PARSERS = (
Expand Down Expand Up @@ -117,8 +118,19 @@ def is_digit(self) -> bool:
21/4.35.1.2
5.0.6
Or if entire string is mostly numeric, discard
15B93
"""
return self.user_agent.translate(trans_tbl).isdigit()
if self.user_agent.translate(trans_tbl).isdigit():
return True

alphabetic_chars = 0
for char in self.user_agent:
if not char.isnumeric():
alphabetic_chars += 1

return alphabetic_chars < 2

def is_uuid(self) -> bool:
"""
Expand All @@ -141,6 +153,16 @@ def is_uuid(self) -> bool:
except (ValueError, AttributeError):
return False

def is_gibberish(self):
"""
If UserAgent string is long and has no Space, Dot or Slash
consider it meaningless gibberish
"""
if len(self.user_agent) < 65:
return False
punc_removed = self.user_agent.translate(punctuation_tbl)
return punc_removed == self.user_agent

def normalize(self):
"""
Check for common worthless features that preclude the need for any further processing.
Expand All @@ -158,6 +180,8 @@ def normalize(self):
self.all_details['normalized'] = 'Numeric'
elif self.is_uuid():
self.all_details['normalized'] = 'UUID'
elif self.is_gibberish():
self.all_details['normalized'] = 'Gibberish'
else:
for nr in self.normalized_regex_list:
regex = nr['regex']
Expand Down Expand Up @@ -205,13 +229,23 @@ def parse_client(self) -> None:
if self.client:
return

app_id = ApplicationIDExtractor(self.user_agent).extract()

for Parser in self.CLIENT_PARSERS:
parser = Parser(self.user_agent).parse()
if parser.ua_data:
self.client = parser
self.all_details['client'] = parser.ua_data
self.all_details['client']['app_id'] = app_id
return

# if no client matched, still add name / app_id values
if app_id:
self.all_details['client'] = {
'name': app_id,
'app_id': app_id,
}

def parse_device(self) -> None:
"""
Parses the UA for Device information using the Device or Bot parsers
Expand Down
4 changes: 4 additions & 0 deletions device_detector/regexes/local/device/normalize.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
- regex: '^(A\/[\d\.]+\/\w+\/[\w\d\-\#\(\)]+)(?:\/(?:.*)|$)'
groups: '\g<1>'

# 1.0,win/6.3.9600,AV/18.4.2338,avl/push/18.4.3895.325,ffl
- regex: '(\d\.\d,win\/[\d+\.]+,AV)(\/[\d+\.]+)(,avl\/[\w]+)\/.*'
groups: '\g<1>\g<3>'

# Remove repeated characters like "baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
# match 12+ repetitions to avoid UUIDs like "00000000-0000-0000-0000-000000000000"
- regex: (?:.*)(.)\1{12,}(.*)
Expand Down
9 changes: 9 additions & 0 deletions device_detector/tests/fixtures/local/normalize.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@
-
user_agent: Symantec/3.00.10.2737 MID/{00000000-0000-0000-0000-000000000000} SID/zIRxWwAAAAA LUE/1.12.5.5 (Windows;6.2;SP0.0;X64;ENC)
normalized: Symantec/3.00.10.2737 (Windows;6.2;SP0.0;X64;ENC)
-
user_agent: 1.0,win/6.1.7601,AV/18.5.3059,avl/devcontrol/18.5.3931.338,ffl
normalized: 1.0,win/6.1.7601,AV,avl/devcontrol
-
user_agent: 1.0,win/6.1.7601,AV/18.6.3066,avl/myavast/18.6.3983.0,ffl
normalized: 1.0,win/6.1.7601,AV,avl/myavast
-
user_agent: 1.0,win/6.3.9600,AV/18.6.2540,avl/devcontrol/18.5.3931.361,ffl
normalized: 1.0,win/6.3.9600,AV,avl/devcontrol
-
user_agent: 'EAV Update (Windows; U; 64bit; BPC 11.2.49.0; OS: 6.1.7601 SP 1.0 NT; TDB 38379; CL 1.0.0; x64c; APP eav; ASP 0.0; PX 1; PUA 1; CD 1; RA 0; PEV 0; HWF: 0100CDD9-B875-9C8B-3107-CA8A12DC46E5; PLOC en_us; PCODE 106.0.0; PAR -1; ATH -1; DC 0; PLID 3AA-5RV-KST; SEAT db67f021; RET 2101)'
normalized: EAV Update (Windows; U; 64bit)
Expand Down

0 comments on commit e764c99

Please sign in to comment.