diff --git a/README.md b/README.md index 0f02fc7..7930304 100644 --- a/README.md +++ b/README.md @@ -50,11 +50,13 @@ end ## Supported - [Ahrefs](https://ahrefs.com/robot) +- [Amazonbot](https://developer.amazon.com/amazonbot) - [Amazon AdBot](https://adbot.amazon.com/index.html) - [Applebot](https://support.apple.com/en-us/119829) - [Baidu spider](http://help.baidu.com/question?prod_en=master&class=498&id=1000973) - [Bingbot](https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/) - [BLEXBot (WebMeUp)](http://webmeup-crawler.com/) +- [DataForSEO](https://dataforseo.com/dataforseo-bot) - [DuckDuckGo bot](https://duckduckgo.com/duckduckbot) - [Google crawlers](https://support.google.com/webmasters/answer/1061943) - [IAS](https://integralads.com/ias-privacy-data-management/policies/site-indexing-policy/) diff --git a/lib/legitbot.rb b/lib/legitbot.rb index 79756a9..28ce4da 100644 --- a/lib/legitbot.rb +++ b/lib/legitbot.rb @@ -9,6 +9,7 @@ require_relative 'legitbot/baidu' require_relative 'legitbot/bing' require_relative 'legitbot/blexbot' +require_relative 'legitbot/dataforseo' require_relative 'legitbot/duckduckgo' require_relative 'legitbot/facebook' require_relative 'legitbot/google' diff --git a/lib/legitbot/amazon.rb b/lib/legitbot/amazon.rb index fe20bc2..317415a 100644 --- a/lib/legitbot/amazon.rb +++ b/lib/legitbot/amazon.rb @@ -2,9 +2,10 @@ module Legitbot # :nodoc: # https://adbot.amazon.com/index.html + # https://developer.amazon.com/amazonbot class Amazon < BotMatch - domains 'amazonadbot.com.' + domains 'amazon.', 'amazonadbot.com.' end - rule Legitbot::Amazon, %w[AmazonAdBot] + rule Legitbot::Amazon, %w[Amazonbot AmazonAdBot] end diff --git a/lib/legitbot/dataforseo.rb b/lib/legitbot/dataforseo.rb new file mode 100644 index 0000000..3ce2da2 --- /dev/null +++ b/lib/legitbot/dataforseo.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +module Legitbot # :nodoc: + # https://dataforseo.com/dataforseo-bot + class DataForSEO < BotMatch + domains 'dataforseo.com.' + end + + rule Legitbot::DataForSEO, %w[DataForSeoBot] +end diff --git a/test/amazon_test.rb b/test/amazon_test.rb index 7a33a8c..052b0f2 100644 --- a/test/amazon_test.rb +++ b/test/amazon_test.rb @@ -30,7 +30,7 @@ def test_malicious_ua refute_predicate bot, :valid? end - def test_valid_ua + def test_user_agent1 bot = Legitbot.bot( 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)', '54.166.7.90' @@ -40,7 +40,19 @@ def test_valid_ua assert_predicate bot, :valid? end - def test_valid_name + # rubocop:disable Layout/LineLength + def test_user_agent2 + bot = Legitbot.bot( + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)', + '52.70.240.171' + ) + + assert bot + assert_predicate bot, :valid? + end + # rubocop:enable Layout/LineLength + + def test_valid_name1 bot = Legitbot.bot( 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)', '54.166.7.90' @@ -49,6 +61,17 @@ def test_valid_name assert_equal :amazon, bot.detected_as end + # rubocop:disable Layout/LineLength + def test_valid_name2 + bot = Legitbot.bot( + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)', + '52.70.240.171' + ) + + assert_equal :amazon, bot.detected_as + end + # rubocop:enable Layout/LineLength + def test_fake_name bot = Legitbot.bot( 'Mozilla/5.0 (compatible; AmazonAdBot/1.0; +https://adbot.amazon.com)', diff --git a/test/dataforseo_test.rb b/test/dataforseo_test.rb new file mode 100644 index 0000000..cd105ca --- /dev/null +++ b/test/dataforseo_test.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require_relative 'test_helper' + +class DataForSEOTest < Minitest::Test + include Minitest::Hooks + include DnsServerMock + + def test_malicious_ip + ip = '149.210.164.47' + match = Legitbot::DataForSEO.new ip + + refute_predicate match, :valid? + end + + def test_valid_ip + ip = '136.243.228.176' + match = Legitbot::DataForSEO.new ip + + assert_predicate match, :valid? + end + + def test_malicious_ua + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)', + '149.210.164.47' + ) + + assert bot + refute_predicate bot, :valid? + end + + def test_valid_ua + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)', + '136.243.228.176' + ) + + assert bot + assert_predicate bot, :valid? + end + + def test_valid_name + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)', + '136.243.228.176' + ) + + assert_equal :dataforseo, bot.detected_as + end + + def test_fake_name + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; DataForSeoBot; +https://dataforseo.com/dataforseo-bot)', + '81.1.172.108' + ) + + assert_equal :dataforseo, bot.detected_as + end +end diff --git a/test/lib/dns_server_mock.rb b/test/lib/dns_server_mock.rb index b847c81..3381725 100644 --- a/test/lib/dns_server_mock.rb +++ b/test/lib/dns_server_mock.rb @@ -29,6 +29,12 @@ '54.166.7.90' => { ptr: %w[crawler-54-166-7-90.amazonadbot.com] }, + '52-70-240-171.crawl.amazonbot.amazon' => { + a: %w[52.70.240.171] + }, + '52.70.240.171' => { + ptr: %w[52-70-240-171.crawl.amazonbot.amazon] + }, # Apple '17-58-98-60.applebot.apple.com' => { @@ -45,6 +51,13 @@ '65.21.113.197' => { ptr: %w[pot22.webmeup.com] }, + # DataForSEO + 'crawling-gateway-136-243-228-176.dataforseo.com' => { + a: %w[136.243.228.176] + }, + '136.243.228.176' => { + ptr: %w[crawling-gateway-136-243-228-176.dataforseo.com] + }, # Google 'crawl-66-249-64-141.googlebot.com' => {