diff --git a/raw/Crawlers.json b/raw/Crawlers.json index 1423c07..cdcca0f 100644 --- a/raw/Crawlers.json +++ b/raw/Crawlers.json @@ -1 +1 @@ -[".*Java.*outbrain","008\\\/","192\\.comAgent","2ip\\.ru","404checker","^bluefish ","^Calypso v\\\/","^COMODO DCV","^DangDang","^DavClnt","^FDM ","^Grabber","^git\\\/","^Goose\\\/","^HTTPClient\\\/","^Java\\\/","^Jeode\\\/","^Jetty\\\/","^Mget","^Microsoft URL Control","^NG\\\/[0-9\\.]","^NING\\\/","^PHP\\\/[0-9]","^RMA\\\/","^Ruby|Ruby\\\/[0-9]","^scrutiny\\\/","^VSE\\\/[0-9]","^WordPress\\.com","^XRL\\\/[0-9]","^ZmEu","a3logics\\.in","A6-Indexer","a\\.pr-cy\\.ru","Abonti\\\/","Aboundex","aboutthedomain","Accoona-AI-Agent","acoon","acrylicapps\\.com\\\/pulp","adbeat","AddThis","ADmantX","adressendeutschland","adscanner\\\/","Advanced Email Extractor v","agentslug","AHC","aihit","aiohttp\\\/","Airmail","akka-http\\\/","akula\\\/","alertra","alexa site audit","Alibaba\\.Security\\.Heimdall","allloadin\\.com","alyze\\.info","amagit","AndroidDownloadManager","Anemone","AngleSharp\\\/","Ant\\.com","Anturis Agent","AnyEvent-HTTP\\\/","Apache Droid","ApacheBench\\\/","Apache-HttpAsyncClient\\\/","Apache-HttpClient\\\/","APIs-Google","AportWorm\\\/[0-9]","AppBeat\\\/[0-9]","AppEngine-Google","Arachmo","arachnode","Arachnophilia","aria2","asafaweb.com","AskQuickly","Astute","asynchttp","autocite","Autonomy","axios\\\/","B-l-i-t-z-B-O-T","^b0t$","Backlink-Ceck\\.de","Bad-Neighborhood","baidu\\.com","baypup\\\/[0-9]","baypup\\\/colbert","BazQux","BCKLINKS","BDFetch","BegunAdvertising\\\/","BigBozz","biglotron","BingLocalSearch","BingPreview","binlar","biNu image cacher","biz_Directory","Blackboard Safeassign","Bloglovin","BlogPulseLive","BlogSearch","Blogtrottr","boitho\\.com-dc","BPImageWalker","Braintree-Webhooks","Branch Metrics API","Branch-Passthrough","Brodie\\\/","Browsershots","BUbiNG","Buck\\\/","Burf Search","Butterfly\\\/","BuzzSumo","CAAM\\\/[0-9]","CakePHP","CapsuleChecker","CaretNail","catexplorador","cb crawl","CC Metadata Scaper","Cerberian Drtrs","CERT\\.at-Statistics-Survey","cg-eye","changedetection","ChangesMeter\\\/","Charlotte","CheckHost","checkprivacy","chkme\\.com","Chirp\\\/[0-9]","CirrusExplorer\\\/","CISPA Vulnerability Notification","Citoid","CJNetworkQuality","Clarsentia","clips\\.ua\\.ac\\.be","Cloud mapping experiment","CloudEndure","CloudFlare-AlwaysOnline","Cloudinary\\\/[0-9]","cmcm\\.com","coccoc","colly -","CommaFeed","Commons-HttpClient","Comodo SSL Checker","contactbigdatafr","convera","copyright sheriff","Covario-IDS","CrawlForMe\\\/[0-9]","cron-job\\.org","Crowsnest","curb","Curious George","curl","cuwhois\\\/[0-9]","cybo\\.com","DareBoost","help@dataminr\\.com","DataparkSearch","dataprovider","Daum(oa)?[ \\\/][0-9]","DeuSu","developers\\.google\\.com\\\/\\+\\\/web\\\/snippet\\\/","Digg","Dispatch\\\/","dlvr","DMBrowser","DNS-Tools Header-Analyzer","DNSPod-reporting","docoloc","Dolphin http client\\\/","DomainAppender","Donuts Content Explorer","dotMailer content retrieval","dotSemantic","downforeveryoneorjustme","downnotifier\\.com","DowntimeDetector","Dragonfly File Reader","drupact","Drupal \\(\\+http:\\\/\\\/drupal\\.org\\\/\\)","dubaiindex","EARTHCOM","Easy-Thumb","ec2linkfinder","eCairn-Grabber","ECCP","echocrawl","eContext\\\/","ElectricMonk","elefent","EMail Exractor","Email%20Extractor%20Lite","EmailWolf","Embed PHP Library","Embedly","europarchive\\.org","evc-batch\\\/[0-9]","EventMachine HttpClient","Evidon","Evrinid","ExactSearch","ExaleadCloudview","Excel\\\/","Exif Viewer","Exploratodo","Express WebPictures","ezooms","facebookexternalhit","facebookplatform","fairshare","Faraday v","fasthttp","Faveeo","Favicon downloader","FavOrg","Feed Wrangler","Feedbin","FeedBooster","FeedBucket","FeedBunch\\\/[0-9]","FeedBurner","FeedChecker","Feedly","Feedspot","Feedwind\\\/[0-9]","feeltiptop","Fetch API","Fetch\\\/[0-9]","Fever\\\/[0-9]","findlink","findthatfile","FlipboardBrowserProxy","FlipboardProxy","FlipboardRSS","fluffy","flynxapp","forensiq","FoundSeoTool\\\/[0-9]","free thumbnails","FreeWebMonitoring SiteChecker","Funnelback","G-i-g-a-b-o-t","g00g1e\\.net","GAChecker","ganarvisitas\\\/[0-9]","geek-tools","Genderanalyzer","Genieo","GentleSource","GetLinkInfo","getprismatic\\.com","GetURLInfo\\\/[0-9]","Ghost Inspector","GigablastOpenSource","GIS-LABS","github-camo","github\\.com\\\/","Go [\\d\\.]* package http","Go-http-client","gobyus","gofetch","GomezAgent","gooblog","Goodzer\\\/[0-9]","Google favicon","Google Keyword Suggestion","Google Keyword Tool","Google Page Speed Insights","Google PP Default","Google Search Console","Google Web Preview","Google-Adwords","Google-Apps-Script","Google-Calendar-Importer","Google-HotelAdsVerifier","Google-HTTP-Java-Client","Google-Publisher-Plugin","Google-SearchByImage","Google-Site-Verification","Google-Structured-Data-Testing-Tool","Google-Youtube-Links","google_partner_monitoring","GoogleDocs","GoogleHC\\\/","GoogleProducer","Gookey","GoScraper","GoSpotCheck","GoSquared-Status-Checker","gosquared-thumbnailer","grabify","Grammarly","grouphigh","grokkit","grub-client","gSOAP\\\/","GTmetrix","GuzzleHttp","gvfs\\\/","HAA(A)?RTLAND http client","hackney\\\/","Hatena","hawkReader","HEADMasterSEO","HeartRails_Capture","heritrix","historious\\\/","hledejLevne\\.cz\\\/[0-9]","Holmes","HootSuite Image proxy","Hootsuite-WebFeed\\\/[0-9]","HostTracker","ht:\\\/\\\/check","htdig","HTMLParser\\\/","http-get","HTTP-Header-Abfrage","http-kit","http-request\\\/","HTTP-Tiny","HTTP_Compression_Test","http_request2","http_requester","HttpComponents","httphr","HTTPMon","PEAR HTTPRequest","http\\.rb\\\/","httpscheck","httpssites_power","httpunit","HttpUrlConnection","httrack","hosterstats","huaweisymantec","HubPages.*crawlingpolicy","HubSpot ","HyperZbozi.cz Feeder","i2kconnect\\\/","ichiro","IdeelaborPlagiaat","IDG Twitter Links Resolver","IDwhois\\\/[0-9]","Iframely","igdeSpyder","IlTrovatore","ImageEngine\\\/","Imagga","imgsizer","InAGist","inbound\\.li parser","InDesign%20CC","infegy","infohelfer","InfoWizards Reciprocal Link System PRO","Instapaper","inpwrd\\.com","Integrity","integromedb","internet_archive","InternetSeer","internetVista monitor","intraVnews","IODC","IOI","iplabel","IPS\\\/[0-9]","ips-agent","IPWorks HTTP\\\/S Component","iqdb\\\/","Irokez","isitup\\.org","iskanie","iZSearch","janforman","Jaunt\\\/","Jersey\\\/","Jigsaw","Jobboerse","JobFeed discovery","Jobg8 URL Monitor","jobo","Jobrapido","Jobsearch1\\.5","JoinVision Generic","JS-Kit","Kaspersky Lab CFR link resolver","KeepRight OpenStreetMap Checker","Kelny\\\/","Kerrigan\\\/","KeyCDN","Keyword Extractor","Keywords Research","KickFire","KimonoLabs\\\/","Kml-Google","knows\\.is","KOCMOHABT","kouio","kulturarw3","KumKie","L\\.webis","Larbin","Lavf\\\/","LayeredExtractor","letsencrypt","LibVLC","libwww","Licorne Image Snapshot","Liferea\\\/","link checker","Link Valet","link_thumbnailer","LinkAlarm\\\/","linkCheck","linkdex","LinkExaminer","linkfluence","linkpeek","LinkPreviewGenerator","LinkTiger","LinkWalker","Lipperhey","livedoor ScreenShot","LoadImpactPageAnalyzer","LoadImpactRload","LongURL API","looksystems\\.net","ltx71","lua-resty-http","lwp-trivial","lycos","LYT\\.SR","mabontland","MagpieRSS","Mail.Ru","MailChimp","makecontact\\\/","Mandrill","MapperCmd","marketinggrader","masscan\\\/[0-9]","Mediapartners-Google","MegaIndex\\.ru","Melvil Rawi\\\/","MergeFlow-PageReader","Metaspinner","MetaURI","Microsearch","Microsoft-WebDAV-MiniRedir","Microsoft Data Access Internet Publishing Provider Protocol","Microsoft Office ","Microsoft Windows Network Diagnostics","Miniature.io\\\/","Mindjet","Miniflux","mixdata dot com","mixed-content-scan","mixnode","Mnogosearch","mogimogi","Mojolicious \\(Perl\\)","monitis","Monitority\\\/[0-9]","montastic","MonTools","Moreover","Morning Paper","mowser","MovableType","Mrcgiguy","mShots","MxToolbox\\\/","MuckRack\\\/","MVAClient","nagios","Najdi\\.si\\\/","Needle\\\/","NETCRAFT","NetLyzer FastProbe","Netpursual","netresearch","NetShelter ContentScan","Netsparker","NetTrack","Netvibes","Neustar WPM","NeutrinoAPI","NewRelicPinger\\\/1.0 \\(\\d+\\)","NewsBlur .*Finder","NewsGator","newsme","newspaper\\\/","Nexgate Ruby Client","NG-Search","nineconnections\\.com","NLNZ_IAHarvester","Nmap Scripting Engine","node-superagent","node-urllib\\\/","node\\.io","nominet\\.org\\.uk","Norton-Safeweb","Notifixious","notifyninja","nuhk","nutch","Nuzzel","nWormFeedFinder","Nymesis","Ocelli\\\/[0-9]","oegp","Offline Explorer","okhttp","Omea Reader","omgili","OMSC","Online Domain Tools","OpenCalaisSemanticProxy","Openstat\\\/","OpenVAS","Optimizer","Orbiter","OrgProbe\\\/[0-9]","Owler","ow\\.ly","ownCloud News","OxfordCloudService\\\/[0-9]","Page Analyzer","Page Valet","page2rss","page_verifier","PagePeeker","Pagespeed\\\/[0-9]","Panopta","panscient","parsijoo","PayPal IPN","Pcore-HTTP","Pearltrees","peerindex","Peew","Perlu -","PhantomJS\\\/","PhantomJS Screenshoter","Photon\\\/","phpcrawl","phpservermon","Pi-Monster","ping\\.blo\\.gs\\\/","Pingability","Pingdom","Pingoscope","PingSpot","pinterest\\.com","Pizilla","Ploetz \\+ Zeller","Plukkie","PocketParser","POE-Component-Client-HTTP\\\/","Pompos","Porkbun","Port Monitor","postano","PostmanRuntime\\\/","PostPost","postrank","PowerPoint\\\/","Priceonomics Analysis Engine","PritTorrent\\\/[0-9]","PrintFriendly\\.com","Prlog","probethenet","Project 25499","Promotion_Tools_www.searchenginepromotionhelp.com","prospectb2b","Protopage","proximic","PRTG Network Monitor","pshtt, https scanning","PTST ","PTST\\\/[0-9]+","Pulsepoint XT3 web scraper","Python-httplib2","python-requests","Python-urllib","Qirina Hurdler","QQDownload","Qseero","QrafterPro","Qualidator.com SiteAnalyzer","Quora Link Preview","Qwantify","Radian6","RankSonicSiteAuditor","Readability","RealPlayer%20Downloader","RebelMouse","RecurPost\\\/","redback\\\/","Redirect Checker Tool","ReederForMac","request\\.js","ResponseCodeTest\\\/[0-9]","RestSharp","RetrevoPageAnalyzer","Riddler","Rival IQ","Robosourcer","Robozilla\\\/[0-9]","ROI Hunter","RPT-HTTPClient","RSSOwl","safe-agent-scanner","SalesIntelligent","Saleslift","SauceNAO","SBIder","scalaj-http","Scoop","scooter","ScoutJet","ScoutURLMonitor","Scrapy","ScreenShotService\\\/[0-9]","Scrubby","search\\.thunderstone","Search37\\\/","SearchSight","Seeker","semanticdiscovery","semanticjuice","Semiocast HTTP client","sentry\\\/","SEO Browser","Seo Servis","seo-nastroj.cz","Seobility","SEOCentro","SeoCheck","SeopultContentAnalyzer","Server Density Service Monitoring","servernfo\\.com","SetCronJob\\\/","Seznam screenshot-generator","Shelob","Shoppimon Analyzer","ShoppimonAgent\\\/[0-9]","ShopWiki","ShortLinkTranslate","shrinktheweb","SilverReader","Sideqik","SimplePie","SimplyFast","Sitebulb\\\/","SiteIndexed","Site-Shot\\\/","Site24x7","SiteBar","SiteCondor","siteexplorer\\.info","SiteGuardian","Siteimprove\\.com","Sitemap(s)? Generator","SiteMonitor","Siteshooter B0t","SiteSucker","SiteTruth","sitexy\\.com","SkypeUriPreview","Slack\\\/","slider\\.com","slurp","SMRF URL Expander","SMUrlExpander","Snappy","SniffRSS","sniptracker","Snoopy","SnowHaze Search","sogou web","SortSite","sovereign\\.ai","spaziodati","Specificfeeds","speedy","SPEng","Spinn3r","spray-can","Sprinklr ","sqlmap","spyonweb","Sqworm","SSL Labs","ssl-tools","StackRambler","Statastico\\\/","StatusCake","Stratagems Kumo","Stroke.cz","StudioFACA","suchen","summify","Super Monitoring","Surphace Scout","SwiteScraper","Symfony2 BrowserKit","SynHttpClient-Built","Sysomos","Symfony BrowserKit","T0PHackTeam","Tarantula\\\/","Taringa UGC","Tenon\\.io","teoma","terrainformatica\\.com","Test Certificate Info","Tetrahedron\\\/[0-9]","Thinklab","The Drop Reaper","The Expert HTML Source Viewer","theinternetrules","theoldreader\\.com","Thumbshots","ThumbSniper","TinEye","Tiny Tiny RSS","TLSProbe\\\/","topster","touche.com","Traackr.com","TrapitAgent","trendspottr\\.com","truwoGPS","TulipChain","Twisted PageGetter","tweetedtimes\\.com","Tweetminster","Tweezler\\\/","Twikle","Twingly","ubermetrics-technologies","uclassify","uCrawlr\\\/","UdmSearch","UniversalFeedParser","Unshorten\\.It\\!\\\/[0-9]","Untiny","UnwindFetchor","updated","updown\\.io daemon","Upflow","Uptimia","URL Verifier","URLChecker","URLitor.com","urlresolver","Urlstat","UrlTrends Ranking Updater","Vagabondo","vBSEO","via ggpht\\.com GoogleImageProxy","VidibleScraper\\\/","visionutils","vkShare","voltron","voyager\\\/","VSAgent\\\/[0-9]","VSB-TUO\\\/[0-9]","Vulnbusters Meter","VYU2","w3af\\.org","W3C-checklink","W3C-mobileOK","W3C_I18n-Checker","W3C_Unicorn","wangling","Wappalyzer","WatchMouse","WbSrch\\\/","web-capture\\.net","Web-Monitoring","Web-sniffer","Webauskunft","WebCapture","WebClient\\\/","webcollage","WebCookies","WebCopier","WebCorp","WebDoc","WebFetch","WebImages","WebIndex","webkit2png","webmastercoffee","webmon ","webscreenie","Webshot","Website Analyzer\\\/","websitepulse agent","websitepulse[+ ]checker","Websnapr\\\/","Webthumb\\\/[0-9]","WebThumbnail","WeCrawlForThePeace","WeLikeLinks","WEPA","WeSEE","wf84","wget","WhatsApp","WhatsMyIP","WhatWeb","WhereGoes\\?","Whibse","WhoRunsCoinHive","Whynder Magnet","Windows-RSS-Platform","WinHttpRequest","wkhtmlto","wmtips","Woko","Word\\\/","WordPress\\\/","wotbox","WP Engine Install Performance API","wpif","wprecon\\.com survey","WPScan","wscheck","Wtrace","WWW-Mechanize","www\\.monitor\\.us","XaxisSemanticsClassifier","Xenu Link Sleuth","XING-contenttabreceiver\\\/[0-9]","XmlSitemapGenerator","xpymep([0-9]?)\\.exe","Y!J-(ASR|BSC)","Yaanb","yacy","Yahoo Ad monitoring","Yahoo Link Preview","YahooCacheSystem","YahooYSMcm","YandeG","Yandex(?!Search)","yanga","yeti"," YLT","Yo-yo","Yoleo Consumer","yoogliFetchAgent","YottaaMonitor","yourls\\.org","Your-Website-Sucks\\\/[0-9]","Zao","Zemanta Aggregator","Zend\\\\Http\\\\Client","Zend_Http_Client","zgrab","ZnajdzFoto","ZyBorg","[a-z0-9\\-_]*(bot|crawler|archiver|transcoder|spider|uptime|validator|fetcher)"] \ No newline at end of file +[".*Java.*outbrain","008\\\/","192\\.comAgent","2ip\\.ru","404checker","^bluefish ","^Calypso v\\\/","^COMODO DCV","^DangDang","^DavClnt","^FDM ","^Grabber","^git\\\/","^Goose\\\/","^HTTPClient\\\/","^Java\\\/","^Jeode\\\/","^Jetty\\\/","^Mget","^Microsoft URL Control","^NG\\\/[0-9\\.]","^NING\\\/","^PHP\\\/[0-9]","^RMA\\\/","^Ruby|Ruby\\\/[0-9]","^scrutiny\\\/","^VSE\\\/[0-9]","^WordPress\\.com","^XRL\\\/[0-9]","^ZmEu","a3logics\\.in","A6-Indexer","a\\.pr-cy\\.ru","Abonti\\\/","Aboundex","aboutthedomain","Accoona-AI-Agent","acoon","acrylicapps\\.com\\\/pulp","adbeat","AddThis","ADmantX","adressendeutschland","adscanner\\\/","Advanced Email Extractor v","agentslug","AHC","aihit","aiohttp\\\/","Airmail","akka-http\\\/","akula\\\/","alertra","alexa site audit","Alibaba\\.Security\\.Heimdall","allloadin\\.com","alyze\\.info","amagit","AndroidDownloadManager","Anemone","AngleSharp\\\/","Ant\\.com","Anturis Agent","AnyEvent-HTTP\\\/","Apache Droid","ApacheBench\\\/","Apache-HttpAsyncClient\\\/","Apache-HttpClient\\\/","APIs-Google","AportWorm\\\/[0-9]","AppBeat\\\/[0-9]","AppEngine-Google","Arachmo","arachnode","Arachnophilia","aria2","asafaweb.com","AskQuickly","Astute","asynchttp","autocite","Autonomy","axios\\\/","B-l-i-t-z-B-O-T","^b0t$","Backlink-Ceck\\.de","Bad-Neighborhood","baidu\\.com","baypup\\\/[0-9]","baypup\\\/colbert","BazQux","BCKLINKS","BDFetch","BegunAdvertising\\\/","BigBozz","biglotron","BingLocalSearch","BingPreview","binlar","biNu image cacher","biz_Directory","Blackboard Safeassign","Bloglovin","BlogPulseLive","BlogSearch","Blogtrottr","boitho\\.com-dc","BPImageWalker","Braintree-Webhooks","Branch Metrics API","Branch-Passthrough","Brodie\\\/","Browsershots","BUbiNG","Buck\\\/","Burf Search","Butterfly\\\/","BuzzSumo","CAAM\\\/[0-9]","CakePHP","CapsuleChecker","CaretNail","catexplorador","cb crawl","CC Metadata Scaper","Cerberian Drtrs","CERT\\.at-Statistics-Survey","cg-eye","changedetection","ChangesMeter\\\/","Charlotte","CheckHost","checkprivacy","chkme\\.com","Chirp\\\/[0-9]","CirrusExplorer\\\/","CISPA Vulnerability Notification","Citoid","CJNetworkQuality","Clarsentia","clips\\.ua\\.ac\\.be","Cloud mapping experiment","CloudEndure","CloudFlare-AlwaysOnline","Cloudinary\\\/[0-9]","cmcm\\.com","coccoc","colly -","CommaFeed","Commons-HttpClient","Comodo SSL Checker","contactbigdatafr","convera","copyright sheriff","Covario-IDS","CrawlForMe\\\/[0-9]","cron-job\\.org","Crowsnest","curb","Curious George","curl","cuwhois\\\/[0-9]","cybo\\.com","DareBoost","help@dataminr\\.com","DataparkSearch","dataprovider","Daum(oa)?[ \\\/][0-9]","DeuSu","developers\\.google\\.com\\\/\\+\\\/web\\\/snippet\\\/","Digg","Dispatch\\\/","dlvr","DMBrowser","DNS-Tools Header-Analyzer","DNSPod-reporting","docoloc","Dolphin http client\\\/","DomainAppender","Donuts Content Explorer","dotMailer content retrieval","dotSemantic","downforeveryoneorjustme","downnotifier\\.com","DowntimeDetector","Dragonfly File Reader","drupact","Drupal \\(\\+http:\\\/\\\/drupal\\.org\\\/\\)","dubaiindex","EARTHCOM","Easy-Thumb","ec2linkfinder","eCairn-Grabber","ECCP","echocrawl","eContext\\\/","ElectricMonk","elefent","EMail Exractor","Email%20Extractor%20Lite","EmailWolf","Embed PHP Library","Embedly","europarchive\\.org","evc-batch\\\/[0-9]","EventMachine HttpClient","Evidon","Evrinid","ExactSearch","ExaleadCloudview","Excel\\\/","Exif Viewer","ExperianCrawlUK","Exploratodo","Express WebPictures","ezooms","facebookexternalhit","facebookplatform","fairshare","Faraday v","fasthttp","Faveeo","Favicon downloader","FavOrg","Feed Wrangler","Feedbin","FeedBooster","FeedBucket","FeedBunch\\\/[0-9]","FeedBurner","FeedChecker","Feedly","Feedspot","Feedwind\\\/[0-9]","feeltiptop","Fetch API","Fetch\\\/[0-9]","Fever\\\/[0-9]","findlink","findthatfile","FlipboardBrowserProxy","FlipboardProxy","FlipboardRSS","fluffy","flynxapp","forensiq","FoundSeoTool\\\/[0-9]","free thumbnails","FreeWebMonitoring SiteChecker","Funnelback","G-i-g-a-b-o-t","g00g1e\\.net","GAChecker","ganarvisitas\\\/[0-9]","geek-tools","Genderanalyzer","Genieo","GentleSource","GetLinkInfo","getprismatic\\.com","GetURLInfo\\\/[0-9]","Ghost Inspector","GigablastOpenSource","GIS-LABS","github-camo","github\\.com\\\/","Go [\\d\\.]* package http","Go-http-client","Go http package","gobyus","gofetch","GomezAgent","gooblog","Goodzer\\\/[0-9]","Google favicon","Google Keyword Suggestion","Google Keyword Tool","Google Page Speed Insights","Google PP Default","Google Search Console","Google Web Preview","Google-Adwords","Google-Apps-Script","Google-Calendar-Importer","Google-HotelAdsVerifier","Google-HTTP-Java-Client","Google-Publisher-Plugin","Google-SearchByImage","Google-Site-Verification","Google-Structured-Data-Testing-Tool","Google-Youtube-Links","google_partner_monitoring","GoogleDocs","GoogleHC\\\/","GoogleProducer","Gookey","GoScraper","GoSpotCheck","GoSquared-Status-Checker","gosquared-thumbnailer","grabify","Grammarly","grouphigh","grokkit","grub-client","gSOAP\\\/","GTmetrix","GuzzleHttp","gvfs\\\/","HAA(A)?RTLAND http client","hackney\\\/","Hatena","hawkReader","HEADMasterSEO","HeartRails_Capture","heritrix","historious\\\/","hledejLevne\\.cz\\\/[0-9]","Holmes","HonesoSearchEngine\\\/","HootSuite Image proxy","Hootsuite-WebFeed\\\/[0-9]","HostTracker","ht:\\\/\\\/check","htdig","HTMLParser\\\/","http-get","HTTP-Header-Abfrage","http-kit","http-request\\\/","HTTP-Tiny","HTTP_Compression_Test","http_request2","http_requester","HttpComponents","httphr","HTTPMon","PEAR HTTPRequest","http\\.rb\\\/","httpscheck","httpssites_power","httpunit","HttpUrlConnection","httrack","hosterstats","huaweisymantec","HubPages.*crawlingpolicy","HubSpot ","HyperZbozi.cz Feeder","i2kconnect\\\/","ichiro","IdeelaborPlagiaat","IDG Twitter Links Resolver","IDwhois\\\/[0-9]","Iframely","igdeSpyder","IlTrovatore","ImageEngine\\\/","Imagga","imgsizer","InAGist","inbound\\.li parser","InDesign%20CC","infegy","infohelfer","InfoWizards Reciprocal Link System PRO","Instapaper","inpwrd\\.com","Integrity","integromedb","internet_archive","InternetSeer","internetVista monitor","intraVnews","IODC","IOI","iplabel","IPS\\\/[0-9]","ips-agent","IPWorks HTTP\\\/S Component","iqdb\\\/","Irokez","isitup\\.org","iskanie","iZSearch","janforman","Jaunt\\\/","Jersey\\\/","Jigsaw","Jobboerse","JobFeed discovery","Jobg8 URL Monitor","jobo","Jobrapido","Jobsearch1\\.5","JoinVision Generic","JS-Kit","Kaspersky Lab CFR link resolver","KeepRight OpenStreetMap Checker","Kelny\\\/","Kerrigan\\\/","KeyCDN","Keyword Extractor","Keywords Research","KickFire","KimonoLabs\\\/","Kml-Google","knows\\.is","KOCMOHABT","kouio","kulturarw3","KumKie","L\\.webis","Larbin","Lavf\\\/","LayeredExtractor","letsencrypt","LibVLC","libwww","Licorne Image Snapshot","Liferea\\\/","link checker","Link Valet","link_thumbnailer","LinkAlarm\\\/","linkCheck","linkdex","LinkExaminer","linkfluence","linkpeek","LinkPreviewGenerator","LinkTiger","LinkWalker","Lipperhey","livedoor ScreenShot","LoadImpactPageAnalyzer","LoadImpactRload","LongURL API","looksystems\\.net","ltx71","lua-resty-http","lwp-trivial","lycos","LYT\\.SR","mabontland","MagpieRSS","Mail.Ru","MailChimp","makecontact\\\/","Mandrill","MapperCmd","marketinggrader","masscan\\\/[0-9]","Mediapartners-Google","MegaIndex\\.ru","Melvil Rawi\\\/","MergeFlow-PageReader","Metaspinner","MetaURI","Microsearch","Microsoft-WebDAV-MiniRedir","Microsoft Data Access Internet Publishing Provider Protocol","Microsoft Office ","Microsoft Windows Network Diagnostics","Miniature.io\\\/","Mindjet","Miniflux","mixdata dot com","mixed-content-scan","mixnode","Mnogosearch","mogimogi","Mojolicious \\(Perl\\)","monitis","Monitority\\\/[0-9]","montastic","MonTools","Moreover","Morning Paper","mowser","MovableType","Mrcgiguy","mShots","MxToolbox\\\/","MuckRack\\\/","MVAClient","nagios","Najdi\\.si\\\/","Needle\\\/","NETCRAFT","NetLyzer FastProbe","Netpursual","netresearch","NetShelter ContentScan","Netsparker","NetTrack","Netvibes","Neustar WPM","NeutrinoAPI","NewRelicPinger\\\/1.0 \\(\\d+\\)","NewsBlur .*Finder","NewsGator","newsme","newspaper\\\/","Nexgate Ruby Client","NG-Search","nineconnections\\.com","NLNZ_IAHarvester","Nmap Scripting Engine","node-superagent","node-urllib\\\/","node\\.io","nominet\\.org\\.uk","Norton-Safeweb","Notifixious","notifyninja","nuhk","nutch","Nuzzel","nWormFeedFinder","Nymesis","Ocelli\\\/[0-9]","oegp","Offline Explorer","okhttp","Omea Reader","omgili","OMSC","Online Domain Tools","OpenCalaisSemanticProxy","Openstat\\\/","OpenVAS","Optimizer","Orbiter","OrgProbe\\\/[0-9]","orion-semantics","Owler","ow\\.ly","ownCloud News","OxfordCloudService\\\/[0-9]","Page Analyzer","Page Valet","page2rss","page_verifier","PagePeeker","Pagespeed\\\/[0-9]","Panopta","panscient","parsijoo","PayPal IPN","Pcore-HTTP","Pearltrees","peerindex","Peew","Perlu -","PhantomJS\\\/","PhantomJS Screenshoter","Photon\\\/","phpcrawl","phpservermon","Pi-Monster","ping\\.blo\\.gs\\\/","Pingability","Pingdom","Pingoscope","PingSpot","pinterest\\.com","Pizilla","Ploetz \\+ Zeller","Plukkie","PocketParser","POE-Component-Client-HTTP\\\/","Pompos","Porkbun","Port Monitor","postano","PostmanRuntime\\\/","PostPost","postrank","PowerPoint\\\/","Priceonomics Analysis Engine","PritTorrent\\\/[0-9]","PrintFriendly\\.com","Prlog","probethenet","Project 25499","Promotion_Tools_www.searchenginepromotionhelp.com","prospectb2b","Protopage","proximic","PRTG Network Monitor","pshtt, https scanning","PTST ","PTST\\\/[0-9]+","Pulsepoint XT3 web scraper","Python-httplib2","python-requests","Python-urllib","Qirina Hurdler","QQDownload","Qseero","QrafterPro","Qualidator.com SiteAnalyzer","Quora Link Preview","Qwantify","Radian6","RankSonicSiteAuditor","Readability","RealPlayer%20Downloader","RebelMouse","RecurPost\\\/","redback\\\/","Redirect Checker Tool","ReederForMac","request\\.js","ResponseCodeTest\\\/[0-9]","RestSharp","RetrevoPageAnalyzer","Riddler","Rival IQ","Robosourcer","Robozilla\\\/[0-9]","ROI Hunter","RPT-HTTPClient","RSSOwl","safe-agent-scanner","SalesIntelligent","Saleslift","SauceNAO","SBIder","scalaj-http","Scoop","scooter","ScoutJet","ScoutURLMonitor","Scrapy","ScreenShotService\\\/[0-9]","Scrubby","search\\.thunderstone","Search37\\\/","SearchSight","Seeker","semanticdiscovery","semanticjuice","Semiocast HTTP client","sentry\\\/","SEO Browser","Seo Servis","seo-nastroj.cz","Seobility","SEOCentro","SeoCheck","SeopultContentAnalyzer","Server Density Service Monitoring","servernfo\\.com","SetCronJob\\\/","Seznam screenshot-generator","Shelob","Shoppimon Analyzer","ShoppimonAgent\\\/[0-9]","ShopWiki","ShortLinkTranslate","shrinktheweb","SilverReader","Sideqik","SimplePie","SimplyFast","Sitebulb\\\/","SiteIndexed","Site-Shot\\\/","Site24x7","SiteBar","SiteCondor","siteexplorer\\.info","SiteGuardian","Siteimprove\\.com","Sitemap(s)? Generator","SiteMonitor","Siteshooter B0t","SiteSucker","SiteTruth","sitexy\\.com","SkypeUriPreview","Slack\\\/","slider\\.com","slurp","SMRF URL Expander","SMUrlExpander","Snappy","SniffRSS","sniptracker","Snoopy","SnowHaze Search","sogou web","SortSite","sovereign\\.ai","spaziodati","Specificfeeds","speedy","SPEng","Spinn3r","spray-can","Sprinklr ","sqlmap","spyonweb","Sqworm","SSL Labs","ssl-tools","StackRambler","Statastico\\\/","StatusCake","Stratagems Kumo","Stroke.cz","StudioFACA","suchen","summify","Super Monitoring","Surphace Scout","SwiteScraper","Symfony2 BrowserKit","SynHttpClient-Built","Sysomos","Symfony BrowserKit","T0PHackTeam","Tarantula\\\/","Taringa UGC","Tenon\\.io","teoma","terrainformatica\\.com","Test Certificate Info","Tetrahedron\\\/[0-9]","Thinklab","The Drop Reaper","The Expert HTML Source Viewer","theinternetrules","theoldreader\\.com","Thumbshots","ThumbSniper","TinEye","Tiny Tiny RSS","TLSProbe\\\/","topster","touche.com","Traackr.com","TrapitAgent","trendspottr\\.com","truwoGPS","TulipChain","Twisted PageGetter","tweetedtimes\\.com","Tweetminster","Tweezler\\\/","Twikle","Twingly","ubermetrics-technologies","uclassify","uCrawlr\\\/","UdmSearch","UniversalFeedParser","Unshorten\\.It\\!\\\/[0-9]","Untiny","UnwindFetchor","updated","updown\\.io daemon","Upflow","Uptimia","URL Verifier","URLChecker","URLitor.com","urlresolver","Urlstat","UrlTrends Ranking Updater","Vagabondo","vBSEO","via ggpht\\.com GoogleImageProxy","VidibleScraper\\\/","visionutils","vkShare","voltron","voyager\\\/","VSAgent\\\/[0-9]","VSB-TUO\\\/[0-9]","Vulnbusters Meter","VYU2","w3af\\.org","W3C-checklink","W3C-mobileOK","W3C_I18n-Checker","W3C_Unicorn","wangling","Wappalyzer","WatchMouse","WbSrch\\\/","web-capture\\.net","Web-Monitoring","Web-sniffer","Webauskunft","WebCapture","WebClient\\\/","webcollage","WebCookies","WebCopier","WebCorp","WebDoc","WebFetch","WebImages","WebIndex","webkit2png","webmastercoffee","webmon ","webscreenie","Webshot","Website Analyzer\\\/","websitepulse agent","websitepulse[+ ]checker","Websnapr\\\/","Webthumb\\\/[0-9]","WebThumbnail","WeCrawlForThePeace","WeLikeLinks","WEPA","WeSEE","wf84","wget","WhatsApp","WhatsMyIP","WhatWeb","WhereGoes\\?","Whibse","WhoRunsCoinHive","Whynder Magnet","Windows-RSS-Platform","WinHttpRequest","wkhtmlto","wmtips","Woko","Word\\\/","WordPress\\\/","wotbox","WP Engine Install Performance API","wpif","wprecon\\.com survey","WPScan","wscheck","Wtrace","WWW-Mechanize","www\\.monitor\\.us","XaxisSemanticsClassifier","Xenu Link Sleuth","XING-contenttabreceiver\\\/[0-9]","XmlSitemapGenerator","xpymep([0-9]?)\\.exe","Y!J-(ASR|BSC)","Yaanb","yacy","Yahoo Ad monitoring","Yahoo Link Preview","YahooCacheSystem","YahooYSMcm","YandeG","Yandex(?!Search)","yanga","yeti"," YLT","Yo-yo","Yoleo Consumer","yoogliFetchAgent","YottaaMonitor","yourls\\.org","Your-Website-Sucks\\\/[0-9]","Zao","Zemanta Aggregator","Zend\\\\Http\\\\Client","Zend_Http_Client","zgrab","ZnajdzFoto","ZyBorg","[a-z0-9\\-_]*(bot|crawler|archiver|transcoder|spider|uptime|validator|fetcher)"] \ No newline at end of file diff --git a/raw/Crawlers.txt b/raw/Crawlers.txt index b8f4611..b0d93b9 100644 --- a/raw/Crawlers.txt +++ b/raw/Crawlers.txt @@ -210,6 +210,7 @@ ExactSearch ExaleadCloudview Excel\/ Exif Viewer +ExperianCrawlUK Exploratodo Express WebPictures ezooms @@ -265,6 +266,7 @@ github-camo github\.com\/ Go [\d\.]* package http Go-http-client +Go http package gobyus gofetch GomezAgent @@ -315,6 +317,7 @@ heritrix historious\/ hledejLevne\.cz\/[0-9] Holmes +HonesoSearchEngine\/ HootSuite Image proxy Hootsuite-WebFeed\/[0-9] HostTracker @@ -530,6 +533,7 @@ OpenVAS Optimizer Orbiter OrgProbe\/[0-9] +orion-semantics Owler ow\.ly ownCloud News diff --git a/src/Fixtures/Crawlers.php b/src/Fixtures/Crawlers.php index 026cff9..37560c5 100644 --- a/src/Fixtures/Crawlers.php +++ b/src/Fixtures/Crawlers.php @@ -231,6 +231,7 @@ class Crawlers extends AbstractProvider 'ExaleadCloudview', 'Excel\/', 'Exif Viewer', + 'ExperianCrawlUK', 'Exploratodo', 'Express WebPictures', 'ezooms', @@ -286,6 +287,7 @@ class Crawlers extends AbstractProvider 'github\.com\/', 'Go [\d\.]* package http', 'Go-http-client', + 'Go http package', 'gobyus', 'gofetch', 'GomezAgent', @@ -336,6 +338,7 @@ class Crawlers extends AbstractProvider 'historious\/', 'hledejLevne\.cz\/[0-9]', 'Holmes', + 'HonesoSearchEngine\/', 'HootSuite Image proxy', 'Hootsuite-WebFeed\/[0-9]', 'HostTracker', @@ -551,6 +554,7 @@ class Crawlers extends AbstractProvider 'Optimizer', 'Orbiter', 'OrgProbe\/[0-9]', + 'orion-semantics', 'Owler', 'ow\.ly', 'ownCloud News', diff --git a/tests/crawlers.txt b/tests/crawlers.txt index 6780466..6ea81ee 100644 --- a/tests/crawlers.txt +++ b/tests/crawlers.txt @@ -3242,4 +3242,8 @@ hackney/1.11.0 Lavf/57.25.100 makecontact/1.0 (+https://makecontact.io) QrafterPro/4600 CFNetwork/889.9 Darwin/17.2.0 -Buck/2.2; (+https://app.hypefactors.com/media-monitoring/about.html) \ No newline at end of file +Buck/2.2; (+https://app.hypefactors.com/media-monitoring/about.html) +Go http package +HonesoSearchEngine/1.0 +orion-semantics.com 0.1 +ExperianCrawlUK (andrew dot swanton at phgroup dot com) \ No newline at end of file