From e72c3736e8b3c201b93b1a0b88931911a1cdcaa5 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Wed, 18 Oct 2023 16:26:47 +0100 Subject: [PATCH] Fix bot user agent detection --- src/__tests__/utils.js | 18 ++++++++++++++++++ src/utils.ts | 13 +++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/__tests__/utils.js b/src/__tests__/utils.js index 007d8deac..57afc1d8b 100644 --- a/src/__tests__/utils.js +++ b/src/__tests__/utils.js @@ -231,6 +231,24 @@ describe('loadScript', () => { expect(_isBlockedUA(randomisedUserAgent, ['testington'])).toBe(true) } ) + + it('should block googlebot desktop', () => { + expect( + _isBlockedUA( + 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36', + [] + ) + ).toBe(true) + }) + + it('should block openai bot', () => { + expect( + _isBlockedUA( + 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)', + [] + ) + ).toBe(true) + }) }) describe('_isUrlMatchingRegex', () => { diff --git a/src/utils.ts b/src/utils.ts index 8884e0eee..e9524cf74 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -501,12 +501,17 @@ export const DEFAULT_BLOCKED_UA_STRS = [ // This is to block various web spiders from executing our JS and // sending false capturing data export const _isBlockedUA = function (ua: string, customBlockedUserAgents: string[]): boolean { - return DEFAULT_BLOCKED_UA_STRS.concat(customBlockedUserAgents).some((blockedUA) => { - if (ua.includes) { - return ua.includes(blockedUA) + if (!ua) { + return false + } + const uaLower = ua.toLowerCase() + return DEFAULT_BLOCKED_UA_STRS.concat(customBlockedUserAgents || []).some((blockedUA) => { + const blockedUaLower = blockedUA.toLowerCase() + if (uaLower.includes) { + return uaLower.includes(blockedUaLower) } else { // IE 11 :/ - return ua.indexOf(blockedUA) !== -1 + return uaLower.indexOf(blockedUaLower) !== -1 } }) }