diff --git a/cypress/e2e/ua.cy.ts b/cypress/e2e/ua.cy.ts
new file mode 100644
index 000000000..6df99b8ec
--- /dev/null
+++ b/cypress/e2e/ua.cy.ts
@@ -0,0 +1,14 @@
+///
+import { start } from '../support/setup'
+
+describe('User Agent Blocking', () => {
+ it('should pick up that our automated cypress tests are indeed bot traffic', async () => {
+ cy.skipOn('windows')
+ start({})
+
+ cy.window().then((win) => {
+ const isLikelyBot = win.eval('window.posthog._is_bot()')
+ expect(isLikelyBot).to.eql(true)
+ })
+ })
+})
diff --git a/cypress/support/commands.ts b/cypress/support/commands.ts
index 2c82d7f45..11a219bb2 100644
--- a/cypress/support/commands.ts
+++ b/cypress/support/commands.ts
@@ -13,6 +13,7 @@ Cypress.Commands.add('posthogInit', (options) => {
$captures.push(event)
$fullCaptures.push(eventData)
},
+ opt_out_useragent_filter: true,
...options,
})
})
diff --git a/cypress/support/e2e.ts b/cypress/support/e2e.ts
index 429a80322..abff144f2 100644
--- a/cypress/support/e2e.ts
+++ b/cypress/support/e2e.ts
@@ -1,4 +1,5 @@
import './commands'
+import '@cypress/skip-test/support'
// Add console errors into cypress logs.
Cypress.on('window:before:load', (win) => {
diff --git a/cypress/support/index.ts b/cypress/support/index.ts
index 22e18cfb6..82dec1097 100644
--- a/cypress/support/index.ts
+++ b/cypress/support/index.ts
@@ -2,6 +2,7 @@
import { PostHog } from '../../src/posthog-core'
import { PostHogConfig } from '../../src/types'
+import '@cypress/skip-test'
declare global {
// eslint-disable-next-line @typescript-eslint/no-namespace
diff --git a/cypress/support/setup.ts b/cypress/support/setup.ts
index 2e31e880c..655b3e070 100644
--- a/cypress/support/setup.ts
+++ b/cypress/support/setup.ts
@@ -39,7 +39,10 @@ export const start = ({
cy.visit(url)
if (initPosthog) {
- cy.posthogInit(options)
+ cy.posthogInit({
+ opt_out_useragent_filter: true, // we ARE a bot, so we need to enable this opt-out
+ ...options,
+ })
}
if (resetOnInit) {
diff --git a/cypress/tsconfig.json b/cypress/tsconfig.json
index 02963d41b..3592f862f 100644
--- a/cypress/tsconfig.json
+++ b/cypress/tsconfig.json
@@ -1,7 +1,7 @@
{
"compilerOptions": {
"target": "es2015",
- "lib": ["es5", "dom"],
+ "lib": ["es5", "dom", "es2015"],
"types": ["cypress", "node"],
"moduleResolution": "node"
},
diff --git a/package.json b/package.json
index 85b037b67..3b3dcff55 100644
--- a/package.json
+++ b/package.json
@@ -45,6 +45,7 @@
"@babel/plugin-transform-react-jsx": "^7.23.4",
"@babel/preset-env": "7.18.9",
"@babel/preset-typescript": "^7.18.6",
+ "@cypress/skip-test": "^2.6.1",
"@jest/globals": "^27.5.1",
"@rollup/plugin-babel": "^6.0.4",
"@rollup/plugin-json": "^6.1.0",
@@ -67,8 +68,8 @@
"babel-eslint": "10.1.0",
"babel-jest": "^26.6.3",
"compare-versions": "^6.1.0",
- "cypress": "13.6.3",
- "cypress-localstorage-commands": "^2.2.5",
+ "cypress": "13.13.2",
+ "cypress-localstorage-commands": "^2.2.6",
"date-fns": "^3.6.0",
"eslint": "8.56.0",
"eslint-config-posthog-js": "link:eslint-rules",
diff --git a/playground/nextjs/package.json b/playground/nextjs/package.json
index 557149376..4811df568 100644
--- a/playground/nextjs/package.json
+++ b/playground/nextjs/package.json
@@ -4,7 +4,7 @@
"private": true,
"scripts": {
"clean-react": "cd ../../react && rm -rf ./node_modules/",
- "dev": "pnpm run link-posthog-js && pnpm run clean-react && next dev --experimental-https",
+ "dev": "pnpm run link-posthog-js && pnpm run clean-react && next dev",
"dev-crossdomain": "pnpm run link-posthog-js && pnpm run clean-react && NEXT_PUBLIC_CROSSDOMAIN=1 next dev --experimental-https",
"build": "pnpm run build-posthog-js && pnpm run link-posthog-js && pnpm run clean-react && next build",
"start": "next start",
diff --git a/playground/nextjs/pages/ua.tsx b/playground/nextjs/pages/ua.tsx
new file mode 100644
index 000000000..96f749112
--- /dev/null
+++ b/playground/nextjs/pages/ua.tsx
@@ -0,0 +1,32 @@
+import { useEffect, useState } from 'react'
+
+// Try this page with some of the following commands:
+// chrome --headless --disable-gpu --print-to-pdf http://localhost:3000/ua --virtual-time-budget=10000
+// chrome --headless --disable-gpu --print-to-pdf http://localhost:3000/ua --virtual-time-budget=10000 --user-agent="RealHuman"
+
+export default function Home() {
+ const [isClient, setIsClient] = useState(false)
+ useEffect(() => {
+ setIsClient(true)
+ }, [])
+ if (!isClient) {
+ return
Not client
+ }
+ return (
+
+ - UA
+ -
+
{navigator.userAgent}
+
+ - WebDriver
+ -
+
{String(navigator.webdriver)}
+
+ - NavigatorUAData brands
+ -
+ {/* eslint-disable-next-line compat/compat */}
+
{JSON.stringify((navigator as any).userAgentData?.brands)}
+
+
+ )
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 1d1c66c27..5ae54122d 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -36,6 +36,9 @@ devDependencies:
'@babel/preset-typescript':
specifier: ^7.18.6
version: 7.18.6(@babel/core@7.18.9)
+ '@cypress/skip-test':
+ specifier: ^2.6.1
+ version: 2.6.1
'@jest/globals':
specifier: ^27.5.1
version: 27.5.1
@@ -103,11 +106,11 @@ devDependencies:
specifier: ^6.1.0
version: 6.1.0
cypress:
- specifier: 13.6.3
- version: 13.6.3
+ specifier: 13.13.2
+ version: 13.13.2
cypress-localstorage-commands:
- specifier: ^2.2.5
- version: 2.2.5(cypress@13.6.3)
+ specifier: ^2.2.6
+ version: 2.2.6(cypress@13.13.2)
date-fns:
specifier: ^3.6.0
version: 3.6.0
@@ -1659,6 +1662,10 @@ packages:
uuid: 8.3.2
dev: true
+ /@cypress/skip-test@2.6.1:
+ resolution: {integrity: sha512-X+ibefBiuOmC5gKG91wRIT0/OqXeETYvu7zXktjZ3yLeO186Y8ia0K7/gQUpAwuUi28DuqMd1+7tBQVtPkzbPA==}
+ dev: true
+
/@cypress/xvfb@1.2.4(supports-color@8.1.1):
resolution: {integrity: sha512-skbBzPggOVYCbnGgV+0dmBdW/s77ZkAOXIC1knS8NagwDjBrNC1LuXtQJeiN6l+m7lzmHtaoUw/ctJKdqkG57Q==}
dependencies:
@@ -4481,17 +4488,17 @@ packages:
resolution: {integrity: sha512-d4ZVpCW31eWwCMe1YT3ur7mUDnTXbgwyzaL320DrcRT45rfjYxkt5QWLrmOJ+/UEAI2+fQgKe/fCjR8l4TpRgw==}
dev: true
- /cypress-localstorage-commands@2.2.5(cypress@13.6.3):
- resolution: {integrity: sha512-07zpwzWdY+uPi1NEHFhWQNylIJqRxR78Ile05L6WT8h1Gz0OaxgBSZRuzp+pqUni/3Pk4d2ieq/cSh++ZmujEA==}
+ /cypress-localstorage-commands@2.2.6(cypress@13.13.2):
+ resolution: {integrity: sha512-l3nZ+Lu6YbBWk4UIfNrRkNK56BkF8zVbCrqzCf35x4Nlx2wA2r0spBOZXnKjbutQZgo6qDqtS8uXoSqV36YM1Q==}
engines: {node: '>=14.0.0'}
peerDependencies:
cypress: '>=2.1.0'
dependencies:
- cypress: 13.6.3
+ cypress: 13.13.2
dev: true
- /cypress@13.6.3:
- resolution: {integrity: sha512-d/pZvgwjAyZsoyJ3FOsJT5lDsqnxQ/clMqnNc++rkHjbkkiF2h9s0JsZSyyH4QXhVFW3zPFg82jD25roFLOdZA==}
+ /cypress@13.13.2:
+ resolution: {integrity: sha512-PvJQU33933NvS1StfzEb8/mu2kMy4dABwCF+yd5Bi7Qly1HOVf+Bufrygee/tlmty/6j5lX+KIi8j9Q3JUMbhA==}
engines: {node: ^16.0.0 || ^18.0.0 || >=20.0.0}
hasBin: true
requiresBuild: true
@@ -4535,7 +4542,7 @@ packages:
request-progress: 3.0.0
semver: 7.5.4
supports-color: 8.1.1
- tmp: 0.2.1
+ tmp: 0.2.3
untildify: 4.0.0
yauzl: 2.10.0
dev: true
@@ -10250,11 +10257,9 @@ packages:
rimraf: 2.7.1
dev: true
- /tmp@0.2.1:
- resolution: {integrity: sha512-76SUhtfqR2Ijn+xllcI5P1oyannHNHByD80W1q447gU3mp9G9PSpGdWmjUOHRDPiHYacIk66W7ubDTuPF3BEtQ==}
- engines: {node: '>=8.17.0'}
- dependencies:
- rimraf: 3.0.2
+ /tmp@0.2.3:
+ resolution: {integrity: sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==}
+ engines: {node: '>=14.14'}
dev: true
/tmpl@1.0.5:
diff --git a/src/__tests__/posthog-core.ts b/src/__tests__/posthog-core.ts
index 2aa0307df..a0dea04e0 100644
--- a/src/__tests__/posthog-core.ts
+++ b/src/__tests__/posthog-core.ts
@@ -145,24 +145,28 @@ describe('posthog core', () => {
})
it('respects opt_out_useragent_filter (default: false)', () => {
- const originalUseragent = globals.userAgent
- ;(globals as any)['userAgent'] =
- 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'
-
+ const originalNavigator = globals.navigator
+ ;(globals as any).navigator = {
+ ...globals.navigator,
+ userAgent:
+ 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36',
+ }
const hook = jest.fn()
const posthog = posthogWith(defaultConfig, defaultOverrides)
posthog._addCaptureHook(hook)
posthog.capture(eventName, {}, {})
expect(hook).not.toHaveBeenCalledWith('$event')
- ;(globals as any)['userAgent'] = originalUseragent
+ ;(globals as any)['navigator'] = originalNavigator
})
it('respects opt_out_useragent_filter', () => {
- const originalUseragent = globals.userAgent
-
- ;(globals as any)['userAgent'] =
- 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'
+ const originalNavigator = globals.navigator
+ ;(globals as any).navigator = {
+ ...globals.navigator,
+ userAgent:
+ 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36',
+ }
const hook = jest.fn()
const posthog = posthogWith(
@@ -185,7 +189,7 @@ describe('posthog core', () => {
})
)
expect(event.properties['$browser_type']).toEqual('bot')
- ;(globals as any)['userAgent'] = originalUseragent
+ ;(globals as any)['navigator'] = originalNavigator
})
it('truncates long properties', () => {
diff --git a/src/__tests__/utils.test.ts b/src/__tests__/utils.test.ts
index cdbb5e300..a8ae87d6b 100644
--- a/src/__tests__/utils.test.ts
+++ b/src/__tests__/utils.test.ts
@@ -9,7 +9,8 @@
import { _copyAndTruncateStrings, isCrossDomainCookie, _base64Encode } from '../utils'
import { Info } from '../utils/event-utils'
-import { isBlockedUA, DEFAULT_BLOCKED_UA_STRS } from '../utils/blocked-uas'
+import { isLikelyBot, DEFAULT_BLOCKED_UA_STRS, isBlockedUA, NavigatorUAData } from '../utils/blocked-uas'
+import { expect } from '@jest/globals'
function userAgentFor(botString: string) {
const randOne = (Math.random() + 1).toString(36).substring(7)
@@ -103,13 +104,13 @@ describe('utils', () => {
})
})
- describe('user agent blocking', () => {
+ describe('isLikelyBot', () => {
it.each(DEFAULT_BLOCKED_UA_STRS.concat('testington'))(
'blocks a bot based on the user agent %s',
(botString) => {
const randomisedUserAgent = userAgentFor(botString)
- expect(isBlockedUA(randomisedUserAgent, ['testington'])).toBe(true)
+ expect(isLikelyBot({ userAgent: randomisedUserAgent } as Navigator, ['testington'])).toBe(true)
}
)
@@ -125,10 +126,93 @@ describe('utils', () => {
[
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.6422.175 Safari/537.36 (compatible; Google-HotelAdsVerifier/2.0)',
],
+ [
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/122.0.0.0 Safari/537.36',
+ ],
+ [
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cypress/13.6.3 Chrome/114.0.5735.289 Electron/25.8.4 Safari/537.36',
+ ],
])('blocks based on user agent', (botString) => {
expect(isBlockedUA(botString, [])).toBe(true)
expect(isBlockedUA(botString.toLowerCase(), [])).toBe(true)
expect(isBlockedUA(botString.toUpperCase(), [])).toBe(true)
+ expect(isLikelyBot({ userAgent: botString } as Navigator, [])).toBe(true)
+ expect(isLikelyBot({ userAgent: botString.toLowerCase() } as Navigator, [])).toBe(true)
+ expect(isLikelyBot({ userAgent: botString.toUpperCase() } as Navigator, [])).toBe(true)
+ })
+
+ it.each([
+ ['Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:129.0) Gecko/20100101 Firefox/129.0'],
+ [
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
+ ],
+ [
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15',
+ ],
+ [
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) elec/1.0.0 Chrome/126.0.6478.127 Electron/31.2.1 Safari/537.36',
+ ],
+ ])('does not block based on non-bot user agent', (userAgent) => {
+ expect(isBlockedUA(userAgent, [])).toBe(false)
+ expect(isBlockedUA(userAgent.toLowerCase(), [])).toBe(false)
+ expect(isBlockedUA(userAgent.toUpperCase(), [])).toBe(false)
+ expect(isLikelyBot({ userAgent } as Navigator, [])).toBe(false)
+ expect(isLikelyBot({ userAgent: userAgent.toLowerCase() } as Navigator, [])).toBe(false)
+ expect(isLikelyBot({ userAgent: userAgent.toUpperCase() } as Navigator, [])).toBe(false)
+ })
+
+ it('blocks based on the webdriver property being set to true', () => {
+ expect(isLikelyBot({ webdriver: true } as Navigator, [])).toBe(true)
+ })
+
+ it('blocks based on userAgentData', () => {
+ const headlessUserAgentData: NavigatorUAData = {
+ brands: [
+ { brand: 'Not)A;Brand', version: '99' },
+ { brand: 'HeadlessChrome', version: '127' },
+ { brand: 'Chromium', version: '127' },
+ ],
+ }
+ expect(
+ isLikelyBot(
+ {
+ userAgentData: headlessUserAgentData,
+ } as Navigator,
+ []
+ )
+ ).toBe(true)
+ })
+
+ it('does not block a normal browser based of userAgentData', () => {
+ const realUserAgentData: NavigatorUAData = {
+ brands: [
+ { brand: 'Not)A;Brand', version: '99' },
+ { brand: 'Google Chrome', version: '127' },
+ { brand: 'Chromium', version: '127' },
+ ],
+ }
+ expect(
+ isLikelyBot(
+ {
+ userAgentData: realUserAgentData,
+ } as Navigator,
+ []
+ )
+ ).toBe(false)
+ })
+
+ it('does not crash if the type of navigatorUAData changes', () => {
+ // we're not checking the return values of these, only that they don't crash
+ // @ts-expect-error testing invalid data
+ isLikelyBot({ userAgentData: { brands: ['HeadlessChrome'] } } as Navigator, [])
+ // @ts-expect-error testing invalid data
+ isLikelyBot({ userAgentData: { brands: [() => 'HeadlessChrome'] } } as Navigator, [])
+ isLikelyBot({ userAgentData: { brands: () => ['HeadlessChrome'] } } as unknown as Navigator, [])
+ isLikelyBot({ userAgentData: 'HeadlessChrome' } as unknown as Navigator, [])
+ isLikelyBot({ userAgentData: {} } as unknown as Navigator, [])
+ isLikelyBot({ userAgentData: null } as unknown as Navigator, [])
+ isLikelyBot({ userAgentData: () => ['HeadlessChrome'] } as unknown as Navigator, [])
+ isLikelyBot({ userAgentData: true } as unknown as Navigator, [])
})
})
diff --git a/src/posthog-core.ts b/src/posthog-core.ts
index 2d434b0b7..d8160291f 100644
--- a/src/posthog-core.ts
+++ b/src/posthog-core.ts
@@ -10,7 +10,7 @@ import {
isCrossDomainCookie,
isDistinctIdStringLike,
} from './utils'
-import { assignableWindow, document, location, userAgent, window } from './utils/globals'
+import { assignableWindow, document, location, navigator, userAgent, window } from './utils/globals'
import { PostHogFeatureFlags } from './posthog-featureflags'
import { PostHogPersistence } from './posthog-persistence'
import {
@@ -67,7 +67,7 @@ import {
import { Info } from './utils/event-utils'
import { logger } from './utils/logger'
import { SessionPropsManager } from './session-props'
-import { isBlockedUA } from './utils/blocked-uas'
+import { isLikelyBot } from './utils/blocked-uas'
import { extendURLParams, request, SUPPORTS_REQUEST } from './request'
import { Heatmaps } from './heatmaps'
import { ScrollManager } from './scroll-manager'
@@ -774,11 +774,7 @@ export class PostHog {
return
}
- if (
- userAgent &&
- !this.config.opt_out_useragent_filter &&
- isBlockedUA(userAgent, this.config.custom_blocked_useragents)
- ) {
+ if (!this.config.opt_out_useragent_filter && this._is_bot()) {
return
}
@@ -934,9 +930,7 @@ export class PostHog {
// this is only added when this.config.opt_out_useragent_filter is true,
// or it would always add "browser"
if (userAgent && this.config.opt_out_useragent_filter) {
- properties['$browser_type'] = isBlockedUA(userAgent, this.config.custom_blocked_useragents)
- ? 'bot'
- : 'browser'
+ properties['$browser_type'] = this._is_bot() ? 'bot' : 'browser'
}
// note: extend writes to the first object, so lets make sure we
@@ -2048,6 +2042,14 @@ export class PostHog {
this._sync_opt_out_with_persistence()
}
+ _is_bot(): boolean | undefined {
+ if (navigator) {
+ return isLikelyBot(navigator, this.config.custom_blocked_useragents)
+ } else {
+ return undefined
+ }
+ }
+
debug(debug?: boolean): void {
if (debug === false) {
window?.console.log("You've disabled debug mode.")
diff --git a/src/utils/blocked-uas.ts b/src/utils/blocked-uas.ts
index d01da0e56..fd0223d73 100644
--- a/src/utils/blocked-uas.ts
+++ b/src/utils/blocked-uas.ts
@@ -34,6 +34,11 @@ export const DEFAULT_BLOCKED_UA_STRS = [
'yahoo! slurp',
'yandexbot',
+ // headless browsers
+ 'headlesschrome',
+ 'cypress',
+ // we don't block electron here, as many customers use posthog-js in electron apps
+
// a whole bunch of goog-specific crawlers
// https://developers.google.com/search/docs/advanced/crawling/overview-google-crawlers
'Google-HotelAdsVerifier',
@@ -66,3 +71,52 @@ export const isBlockedUA = function (ua: string, customBlockedUserAgents: string
return uaLower.indexOf(blockedUaLower) !== -1
})
}
+
+// There's more in the type, but this is all we use. It's currently experimental, see
+// https://developer.mozilla.org/en-US/docs/Web/API/Navigator/userAgentData
+// if you're reading this in the future, when it's no longer experimental, please remove this type and use an official one.
+// Be extremely defensive here to ensure backwards and *forwards* compatibility, and remove this defensiveness in the
+// future when it is safe to do so.
+export interface NavigatorUAData {
+ brands?: {
+ brand: string
+ version: string
+ }[]
+}
+declare global {
+ interface Navigator {
+ userAgentData?: NavigatorUAData
+ }
+}
+
+export const isLikelyBot = function (navigator: Navigator | undefined, customBlockedUserAgents: string[]): boolean {
+ if (!navigator) {
+ return false
+ }
+ const ua = navigator.userAgent
+ if (ua) {
+ if (isBlockedUA(ua, customBlockedUserAgents)) {
+ return true
+ }
+ }
+ try {
+ // eslint-disable-next-line compat/compat
+ const uaData = navigator?.userAgentData as NavigatorUAData
+ if (uaData?.brands && uaData.brands.some((brandObj) => isBlockedUA(brandObj?.brand, customBlockedUserAgents))) {
+ return true
+ }
+ } catch {
+ // ignore the error, we were using experimental browser features
+ }
+
+ return !!navigator.webdriver
+
+ // There's some more enhancements we could make in this area, e.g. it's possible to check if Chrome dev tools are
+ // open, which will detect some bots that are trying to mask themselves and might get past the checks above.
+ // However, this would give false positives for actual humans who have dev tools open.
+
+ // We could also use the data in navigator.userAgentData.getHighEntropyValues() to detect bots, but we should wait
+ // until this stops being experimental. The MDN docs imply that this might eventually require user permission.
+ // See https://developer.mozilla.org/en-US/docs/Web/API/NavigatorUAData/getHighEntropyValues
+ // It would be very bad if posthog-js caused a permission prompt to appear on every page load.
+}
diff --git a/testcafe/helpers.js b/testcafe/helpers.js
index a4a239698..9dff4711e 100644
--- a/testcafe/helpers.js
+++ b/testcafe/helpers.js
@@ -57,6 +57,7 @@ export const initPosthog = (config) => {
distinctID: 'automated-tester', // We set this to get around the ingestion delay for new distinctIDs
isIdentifiedID: true,
},
+ opt_out_useragent_filter: true,
})
}