From 77592740e43e7c44e3beeebd5de2d47e8c452b09 Mon Sep 17 00:00:00 2001 From: Alexander Azarov Date: Tue, 20 Dec 2016 14:23:55 +0200 Subject: [PATCH] Facebook bot --- README.md | 4 +++- Rakefile | 7 +++++++ legitbot.gemspec | 3 +++ lib/legitbot.rb | 8 +++++--- lib/legitbot/facebook.rb | 37 +++++++++++++++++++++++++++++++++++++ lib/legitbot/version.rb | 2 +- test/facebook_test.rb | 22 ++++++++++++++++++++++ 7 files changed, 78 insertions(+), 5 deletions(-) create mode 100644 lib/legitbot/facebook.rb create mode 100644 test/facebook_test.rb diff --git a/README.md b/README.md index fc67e6b..317ee85 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,9 @@ end ## Issues, problems, plans * Rails middleware -* Facebook: https://developers.facebook.com/docs/sharing/webmasters/crawler +* More testing for Facebook +* Review for thread safety +* Make it possible to reload Facebook IP ranges ## License diff --git a/Rakefile b/Rakefile index 87bdaa8..fc9e3fb 100644 --- a/Rakefile +++ b/Rakefile @@ -12,4 +12,11 @@ Rake::TestTask.new do |t| t.verbose = true end +desc 'Start a console' +task :console do + require 'irb' + ARGV.clear + IRB.start +end + task default: %w[test] diff --git a/legitbot.gemspec b/legitbot.gemspec index bcd344d..7f13db4 100644 --- a/legitbot.gemspec +++ b/legitbot.gemspec @@ -15,6 +15,9 @@ Gem::Specification.new do |spec| "made by a real search engine, not a fake" spec.required_ruby_version = '>= 2.0.0' + spec.add_dependency "irrc" + spec.add_dependency "segment_tree" + spec.add_dependency "concurrent-ruby" spec.add_development_dependency "rake" spec.add_development_dependency "minitest" diff --git a/lib/legitbot.rb b/lib/legitbot.rb index 9ba713c..7a3a18a 100644 --- a/lib/legitbot.rb +++ b/lib/legitbot.rb @@ -2,8 +2,10 @@ require_relative 'legitbot/legitbot' require_relative 'legitbot/botmatch' -require_relative 'legitbot/google' -require_relative 'legitbot/yandex' -require_relative 'legitbot/bing' + require_relative 'legitbot/baidu' +require_relative 'legitbot/bing' require_relative 'legitbot/duckduckgo' +require_relative 'legitbot/facebook' +require_relative 'legitbot/google' +require_relative 'legitbot/yandex' diff --git a/lib/legitbot/facebook.rb b/lib/legitbot/facebook.rb new file mode 100644 index 0000000..cb9f92b --- /dev/null +++ b/lib/legitbot/facebook.rb @@ -0,0 +1,37 @@ +require 'segment_tree' +require 'irrc' +require 'concurrent' + +module Legitbot + # https://developers.facebook.com/docs/sharing/webmasters/crawler + + class Facebook < BotMatch + AS = 'AS32934' + ValidIPs = Concurrent::Delay.new do + client = Irrc::Client.new + client.query :radb, 'AS32934' + results = client.perform + Hash[ + :ipv4 => SegmentTree.new( + results[AS][:ipv4][AS].map { |cidr| + [IPAddr.new(cidr).to_range, true] + }), + :ipv6 => SegmentTree.new( + results[AS][:ipv6][AS].map { |cidr| + [IPAddr.new(cidr).to_range, true] + }) + ] + end + + def valid? + ip = IPAddr.new(@ip) + if ip.ipv4? + ValidIPs.value[:ipv4].find(ip) + else + ValidIPs.value[:ipv6].find(ip) + end + end + end + + rule Legitbot::Facebook, %w(facebookhit facebookexternalhit) +end diff --git a/lib/legitbot/version.rb b/lib/legitbot/version.rb index a38b859..a7f7988 100644 --- a/lib/legitbot/version.rb +++ b/lib/legitbot/version.rb @@ -1,3 +1,3 @@ module Legitbot - VERSION = '0.0.1' + VERSION = '0.1.0' end diff --git a/test/facebook_test.rb b/test/facebook_test.rb new file mode 100644 index 0000000..c6b76e2 --- /dev/null +++ b/test/facebook_test.rb @@ -0,0 +1,22 @@ +require 'minitest/autorun' +require 'legitbot' + +class FacebookTest < Minitest::Test + def test_valid_ip + ip = "69.63.186.89" + match = Legitbot::Facebook.new(ip) + assert match.valid?, msg: "#{ip} is a valid Facebook IP" + end + + def test_invalid_ip + ip = "127.0.0.1" + match = Legitbot::Facebook.new(ip) + assert match.fake?, msg: "#{ip} is a fake Facebook IP" + end + + def test_user_agent + bot = Legitbot.bot("facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)", "31.13.76.56") + assert_equal "Facebook", bot.detected_as + assert bot.valid?, msg: "A valid Facebook User-agent and IP" + end +end