Skip to content

Commit

Permalink
feat: Use NavigatorUAData and navigator.webdriver to improve bot dete…
Browse files Browse the repository at this point in the history
…ction (#1359)

* Use NavigatorUAData and navigator.webdriver to improve bot detection

* Use real values of brands from headless and regular chrome in tests

* Reduce bundle size slightly

* Remove global

* Fix test setup

* Opt out of bot filtering in cypress

* Make _is_likely_bot an instance function on posthog, use it in cypress tests

* Fix cypress imports

* Code golf

* More code golf

* Fix testcafe tests

* Treat cypress as a bot

* Fix cypress chaining logic
  • Loading branch information
robbie-c authored Aug 20, 2024
1 parent 57c3b46 commit 990e323
Show file tree
Hide file tree
Showing 15 changed files with 246 additions and 43 deletions.
14 changes: 14 additions & 0 deletions cypress/e2e/ua.cy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/// <reference types="cypress" />
import { start } from '../support/setup'

describe('User Agent Blocking', () => {
it('should pick up that our automated cypress tests are indeed bot traffic', async () => {
cy.skipOn('windows')
start({})

cy.window().then((win) => {
const isLikelyBot = win.eval('window.posthog._is_bot()')
expect(isLikelyBot).to.eql(true)
})
})
})
1 change: 1 addition & 0 deletions cypress/support/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Cypress.Commands.add('posthogInit', (options) => {
$captures.push(event)
$fullCaptures.push(eventData)
},
opt_out_useragent_filter: true,
...options,
})
})
Expand Down
1 change: 1 addition & 0 deletions cypress/support/e2e.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import './commands'
import '@cypress/skip-test/support'

// Add console errors into cypress logs.
Cypress.on('window:before:load', (win) => {
Expand Down
1 change: 1 addition & 0 deletions cypress/support/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import { PostHog } from '../../src/posthog-core'
import { PostHogConfig } from '../../src/types'
import '@cypress/skip-test'

declare global {
// eslint-disable-next-line @typescript-eslint/no-namespace
Expand Down
5 changes: 4 additions & 1 deletion cypress/support/setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ export const start = ({
cy.visit(url)

if (initPosthog) {
cy.posthogInit(options)
cy.posthogInit({
opt_out_useragent_filter: true, // we ARE a bot, so we need to enable this opt-out
...options,
})
}

if (resetOnInit) {
Expand Down
2 changes: 1 addition & 1 deletion cypress/tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"compilerOptions": {
"target": "es2015",
"lib": ["es5", "dom"],
"lib": ["es5", "dom", "es2015"],
"types": ["cypress", "node"],
"moduleResolution": "node"
},
Expand Down
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"@babel/plugin-transform-react-jsx": "^7.23.4",
"@babel/preset-env": "7.18.9",
"@babel/preset-typescript": "^7.18.6",
"@cypress/skip-test": "^2.6.1",
"@jest/globals": "^27.5.1",
"@rollup/plugin-babel": "^6.0.4",
"@rollup/plugin-json": "^6.1.0",
Expand All @@ -67,8 +68,8 @@
"babel-eslint": "10.1.0",
"babel-jest": "^26.6.3",
"compare-versions": "^6.1.0",
"cypress": "13.6.3",
"cypress-localstorage-commands": "^2.2.5",
"cypress": "13.13.2",
"cypress-localstorage-commands": "^2.2.6",
"date-fns": "^3.6.0",
"eslint": "8.56.0",
"eslint-config-posthog-js": "link:eslint-rules",
Expand Down
2 changes: 1 addition & 1 deletion playground/nextjs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"private": true,
"scripts": {
"clean-react": "cd ../../react && rm -rf ./node_modules/",
"dev": "pnpm run link-posthog-js && pnpm run clean-react && next dev --experimental-https",
"dev": "pnpm run link-posthog-js && pnpm run clean-react && next dev",
"dev-crossdomain": "pnpm run link-posthog-js && pnpm run clean-react && NEXT_PUBLIC_CROSSDOMAIN=1 next dev --experimental-https",
"build": "pnpm run build-posthog-js && pnpm run link-posthog-js && pnpm run clean-react && next build",
"start": "next start",
Expand Down
32 changes: 32 additions & 0 deletions playground/nextjs/pages/ua.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { useEffect, useState } from 'react'

// Try this page with some of the following commands:
// chrome --headless --disable-gpu --print-to-pdf http://localhost:3000/ua --virtual-time-budget=10000
// chrome --headless --disable-gpu --print-to-pdf http://localhost:3000/ua --virtual-time-budget=10000 --user-agent="RealHuman"

export default function Home() {
const [isClient, setIsClient] = useState(false)
useEffect(() => {
setIsClient(true)
}, [])
if (!isClient) {
return <pre>Not client</pre>
}
return (
<dl>
<dt>UA</dt>
<dd>
<code>{navigator.userAgent}</code>
</dd>
<dt>WebDriver</dt>
<dd>
<code>{String(navigator.webdriver)}</code>
</dd>
<dt>NavigatorUAData brands</dt>
<dd>
{/* eslint-disable-next-line compat/compat */}
<code>{JSON.stringify((navigator as any).userAgentData?.brands)}</code>
</dd>
</dl>
)
}
35 changes: 20 additions & 15 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 14 additions & 10 deletions src/__tests__/posthog-core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,24 +145,28 @@ describe('posthog core', () => {
})

it('respects opt_out_useragent_filter (default: false)', () => {
const originalUseragent = globals.userAgent
;(globals as any)['userAgent'] =
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'

const originalNavigator = globals.navigator
;(globals as any).navigator = {
...globals.navigator,
userAgent:
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36',
}
const hook = jest.fn()
const posthog = posthogWith(defaultConfig, defaultOverrides)
posthog._addCaptureHook(hook)

posthog.capture(eventName, {}, {})
expect(hook).not.toHaveBeenCalledWith('$event')
;(globals as any)['userAgent'] = originalUseragent
;(globals as any)['navigator'] = originalNavigator
})

it('respects opt_out_useragent_filter', () => {
const originalUseragent = globals.userAgent

;(globals as any)['userAgent'] =
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'
const originalNavigator = globals.navigator
;(globals as any).navigator = {
...globals.navigator,
userAgent:
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36',
}

const hook = jest.fn()
const posthog = posthogWith(
Expand All @@ -185,7 +189,7 @@ describe('posthog core', () => {
})
)
expect(event.properties['$browser_type']).toEqual('bot')
;(globals as any)['userAgent'] = originalUseragent
;(globals as any)['navigator'] = originalNavigator
})

it('truncates long properties', () => {
Expand Down
90 changes: 87 additions & 3 deletions src/__tests__/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

import { _copyAndTruncateStrings, isCrossDomainCookie, _base64Encode } from '../utils'
import { Info } from '../utils/event-utils'
import { isBlockedUA, DEFAULT_BLOCKED_UA_STRS } from '../utils/blocked-uas'
import { isLikelyBot, DEFAULT_BLOCKED_UA_STRS, isBlockedUA, NavigatorUAData } from '../utils/blocked-uas'
import { expect } from '@jest/globals'

function userAgentFor(botString: string) {
const randOne = (Math.random() + 1).toString(36).substring(7)
Expand Down Expand Up @@ -103,13 +104,13 @@ describe('utils', () => {
})
})

describe('user agent blocking', () => {
describe('isLikelyBot', () => {
it.each(DEFAULT_BLOCKED_UA_STRS.concat('testington'))(
'blocks a bot based on the user agent %s',
(botString) => {
const randomisedUserAgent = userAgentFor(botString)

expect(isBlockedUA(randomisedUserAgent, ['testington'])).toBe(true)
expect(isLikelyBot({ userAgent: randomisedUserAgent } as Navigator, ['testington'])).toBe(true)
}
)

Expand All @@ -125,10 +126,93 @@ describe('utils', () => {
[
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.6422.175 Safari/537.36 (compatible; Google-HotelAdsVerifier/2.0)',
],
[
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/122.0.0.0 Safari/537.36',
],
[
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cypress/13.6.3 Chrome/114.0.5735.289 Electron/25.8.4 Safari/537.36',
],
])('blocks based on user agent', (botString) => {
expect(isBlockedUA(botString, [])).toBe(true)
expect(isBlockedUA(botString.toLowerCase(), [])).toBe(true)
expect(isBlockedUA(botString.toUpperCase(), [])).toBe(true)
expect(isLikelyBot({ userAgent: botString } as Navigator, [])).toBe(true)
expect(isLikelyBot({ userAgent: botString.toLowerCase() } as Navigator, [])).toBe(true)
expect(isLikelyBot({ userAgent: botString.toUpperCase() } as Navigator, [])).toBe(true)
})

it.each([
['Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:129.0) Gecko/20100101 Firefox/129.0'],
[
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
],
[
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15',
],
[
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) elec/1.0.0 Chrome/126.0.6478.127 Electron/31.2.1 Safari/537.36',
],
])('does not block based on non-bot user agent', (userAgent) => {
expect(isBlockedUA(userAgent, [])).toBe(false)
expect(isBlockedUA(userAgent.toLowerCase(), [])).toBe(false)
expect(isBlockedUA(userAgent.toUpperCase(), [])).toBe(false)
expect(isLikelyBot({ userAgent } as Navigator, [])).toBe(false)
expect(isLikelyBot({ userAgent: userAgent.toLowerCase() } as Navigator, [])).toBe(false)
expect(isLikelyBot({ userAgent: userAgent.toUpperCase() } as Navigator, [])).toBe(false)
})

it('blocks based on the webdriver property being set to true', () => {
expect(isLikelyBot({ webdriver: true } as Navigator, [])).toBe(true)
})

it('blocks based on userAgentData', () => {
const headlessUserAgentData: NavigatorUAData = {
brands: [
{ brand: 'Not)A;Brand', version: '99' },
{ brand: 'HeadlessChrome', version: '127' },
{ brand: 'Chromium', version: '127' },
],
}
expect(
isLikelyBot(
{
userAgentData: headlessUserAgentData,
} as Navigator,
[]
)
).toBe(true)
})

it('does not block a normal browser based of userAgentData', () => {
const realUserAgentData: NavigatorUAData = {
brands: [
{ brand: 'Not)A;Brand', version: '99' },
{ brand: 'Google Chrome', version: '127' },
{ brand: 'Chromium', version: '127' },
],
}
expect(
isLikelyBot(
{
userAgentData: realUserAgentData,
} as Navigator,
[]
)
).toBe(false)
})

it('does not crash if the type of navigatorUAData changes', () => {
// we're not checking the return values of these, only that they don't crash
// @ts-expect-error testing invalid data
isLikelyBot({ userAgentData: { brands: ['HeadlessChrome'] } } as Navigator, [])
// @ts-expect-error testing invalid data
isLikelyBot({ userAgentData: { brands: [() => 'HeadlessChrome'] } } as Navigator, [])
isLikelyBot({ userAgentData: { brands: () => ['HeadlessChrome'] } } as unknown as Navigator, [])
isLikelyBot({ userAgentData: 'HeadlessChrome' } as unknown as Navigator, [])
isLikelyBot({ userAgentData: {} } as unknown as Navigator, [])
isLikelyBot({ userAgentData: null } as unknown as Navigator, [])
isLikelyBot({ userAgentData: () => ['HeadlessChrome'] } as unknown as Navigator, [])
isLikelyBot({ userAgentData: true } as unknown as Navigator, [])
})
})

Expand Down
Loading

0 comments on commit 990e323

Please sign in to comment.