From 830a613591ed612228b964cfcc72cf2b43cb08e2 Mon Sep 17 00:00:00 2001 From: Alexander Azarov Date: Sat, 31 Aug 2024 11:50:43 +0100 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20split=20OpenAI=20crawlers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/legitbot.rb | 2 +- lib/legitbot/gptbot.rb | 15 -------------- lib/legitbot/openai.rb | 46 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 16 deletions(-) delete mode 100644 lib/legitbot/gptbot.rb create mode 100644 lib/legitbot/openai.rb diff --git a/lib/legitbot.rb b/lib/legitbot.rb index 143c012..957fb02 100644 --- a/lib/legitbot.rb +++ b/lib/legitbot.rb @@ -11,8 +11,8 @@ require_relative 'legitbot/duckduckgo' require_relative 'legitbot/facebook' require_relative 'legitbot/google' -require_relative 'legitbot/gptbot' require_relative 'legitbot/ias' +require_relative 'legitbot/openai' require_relative 'legitbot/oracle' require_relative 'legitbot/marginalia' require_relative 'legitbot/meta' diff --git a/lib/legitbot/gptbot.rb b/lib/legitbot/gptbot.rb deleted file mode 100644 index bc760c1..0000000 --- a/lib/legitbot/gptbot.rb +++ /dev/null @@ -1,15 +0,0 @@ -# frozen_string_literal: true - -module Legitbot # :nodoc: - # https://platform.openai.com/docs/gptbot - class GPTBot < BotMatch - # NOTE: fetching has been disabled, see #131 - # @ fetch:url https://openai.com/gptbot-ranges.txt - ip_ranges %w[ - 52.230.152.0/24 - 52.233.106.0/24 - ] - end - - rule Legitbot::GPTBot, %w[GPTBot] -end diff --git a/lib/legitbot/openai.rb b/lib/legitbot/openai.rb new file mode 100644 index 0000000..417a8a1 --- /dev/null +++ b/lib/legitbot/openai.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module Legitbot # :nodoc: + # https://platform.openai.com/docs/gptbot + class GPTBot < BotMatch + # NOTE: fetching is disabled, see #131 + # @ fetch:url https://openai.com/gptbot.json + ip_ranges %w[ + 20.171.206.0/24 + 52.230.152.0/24 + 52.233.106.0/24 + ] + end + + # https://platform.openai.com/docs/bots + class OpenAIChat < BotMatch + # NOTE: fetching is disabled, see #131 + # @ fetch:url https://openai.com/chatgpt-user.json + ip_ranges %w[ + 23.98.142.176/28 + 40.84.180.224/28 + 13.65.240.240/28 + 20.97.189.96/28 + 20.161.75.208/28 + 52.225.75.208/28 + 52.156.77.144/28 + 40.84.221.208/28 + 40.84.221.224/28 + 40.84.180.64/28 + ] + end + + # https://platform.openai.com/docs/bots + class OpenAISearch < BotMatch + # NOTE: fetching is disabled, see #131 + # @ fetch:url https://openai.com/searchbot.json + ip_ranges %w[ + 20.42.10.176/28 + 172.203.190.128/28 + ] + end + + rule Legitbot::GPTBot, %w[GPTBot] + rule Legitbot::OpenAIChat, %w[ChatGPT-User] + rule Legitbot::OpenAISearch, %w[OAI-SearchBot] +end