diff --git a/src/Fixtures/Crawlers.php b/src/Fixtures/Crawlers.php index cdb8513..e97f71d 100644 --- a/src/Fixtures/Crawlers.php +++ b/src/Fixtures/Crawlers.php @@ -20,19 +20,23 @@ class Crawlers extends AbstractProvider */ protected $data = array( '.*Java.*outbrain', + ' YLT', '008\/', '192\.comAgent', '2ip\.ru', '404checker', + '404enemy', + '80legs', + '^b0t$', '^bluefish ', '^Calypso v\/', '^COMODO DCV', '^DangDang', '^DavClnt', '^FDM ', - '^Grabber', '^git\/', '^Goose\/', + '^Grabber', '^HTTPClient\/', '^Java\/', '^Jeode\/', @@ -52,12 +56,13 @@ class Crawlers extends AbstractProvider 'a3logics\.in', 'A6-Indexer', 'a\.pr-cy\.ru', - 'Abonti\/', + 'Abonti', 'Aboundex', 'aboutthedomain', 'Accoona-AI-Agent', 'acoon', 'acrylicapps\.com\/pulp', + 'Acunetix', 'adbeat', 'AddThis', 'ADmantX', @@ -74,9 +79,12 @@ class Crawlers extends AbstractProvider 'alertra', 'alexa site audit', 'Alibaba\.Security\.Heimdall', + 'Alligator', 'allloadin\.com', + 'AllSubmitter', 'alyze\.info', 'amagit', + 'Anarchie', 'AndroidDownloadManager', 'Anemone', 'AngleSharp\/', @@ -84,9 +92,10 @@ class Crawlers extends AbstractProvider 'Anturis Agent', 'AnyEvent-HTTP\/', 'Apache Droid', - 'ApacheBench\/', 'Apache-HttpAsyncClient\/', 'Apache-HttpClient\/', + 'ApacheBench\/', + 'Apexoo', 'APIs-Google', 'AportWorm\/[0-9]', 'AppBeat\/[0-9]', @@ -97,53 +106,78 @@ class Crawlers extends AbstractProvider 'aria2', 'asafaweb.com', 'AskQuickly', + 'ASPSeek', + 'Asterias', 'Astute', 'asynchttp', + 'Attach', 'autocite', 'Autonomy', 'axios\/', 'B-l-i-t-z-B-O-T', - '^b0t$', - 'Backlink-Ceck\.de', + 'Backlink-Ceck', + 'backlink-check', + 'BackStreet', + 'BackWeb', 'Bad-Neighborhood', + 'Badass', 'baidu\.com', + 'Bandit', + 'BatchFTP', + 'Battleztar\ Bazinga', 'baypup\/[0-9]', 'baypup\/colbert', 'BazQux', + 'BBBike', 'BCKLINKS', 'BDFetch', 'BegunAdvertising\/', 'BigBozz', + 'Bigfoot', 'biglotron', 'BingLocalSearch', 'BingPreview', 'binlar', 'biNu image cacher', + 'Bitacle', 'biz_Directory', + 'Black\ Hole', 'Blackboard Safeassign', + 'BlackWidow', 'Bloglovin', 'BlogPulseLive', 'BlogSearch', 'Blogtrottr', + 'BlowFish', + 'Boardreader', 'boitho\.com-dc', + 'Bolt', 'BPImageWalker', 'Braintree-Webhooks', 'Branch Metrics API', 'Branch-Passthrough', + 'Brandprotect', + 'Brandwatch', 'Brodie\/', 'Browsershots', 'BUbiNG', 'Buck\/', + 'Buddy', + 'BuiltWith', + 'Bullseye', + 'BunnySlippers', 'Burf Search', 'Butterfly\/', 'BuzzSumo', 'CAAM\/[0-9]', 'CakePHP', + 'Calculon', 'CapsuleChecker', 'CaretNail', 'catexplorador', 'cb crawl', 'CC Metadata Scaper', + 'Cegbfeieh', 'Cerberian Drtrs', 'CERT\.at-Statistics-Survey', 'cg-eye', @@ -152,20 +186,24 @@ class Crawlers extends AbstractProvider 'Charlotte', 'CheckHost', 'checkprivacy', - 'chkme\.com', + 'CherryPicker', + 'ChinaClaw', 'Chirp\/[0-9]', + 'chkme\.com', + 'Chlooe', 'CirrusExplorer\/', 'CISPA Vulnerability Notification', 'Citoid', 'CJNetworkQuality', 'Clarsentia', 'clips\.ua\.ac\.be', - 'Cloud mapping experiment', + 'Cloud\ mapping', 'CloudEndure', 'CloudFlare-AlwaysOnline', 'Cloudinary\/[0-9]', 'cmcm\.com', 'coccoc', + 'cognitiveseo', 'colly -', 'CommaFeed', 'Commons-HttpClient', @@ -173,24 +211,37 @@ class Crawlers extends AbstractProvider 'contactbigdatafr', 'convera', 'copyright sheriff', + 'CopyRightCheck', + 'Copyscape', + 'Cosmos4j\.feedback', 'Covario-IDS', 'CrawlForMe\/[0-9]', + 'Crescent', 'cron-job\.org', 'Crowsnest', + 'CSHttp', 'curb', 'Curious George', 'curl', + 'Custo', 'cuwhois\/[0-9]', 'cybo\.com', 'DareBoost', - 'help@dataminr\.com', + 'DatabaseDriverMysqli', + 'DataCha0s', 'DataparkSearch', 'dataprovider', 'Daum(oa)?[ \/][0-9]', + 'Demon', 'DeuSu', 'developers\.google\.com\/\+\/web\/snippet\/', + 'Devil', 'Digg', + 'Digincore', + 'DigitalPebble', + 'Dirbuster', 'Dispatch\/', + 'DittoSpyder', 'dlvr', 'DMBrowser', 'DNS-Tools Header-Analyzer', @@ -202,28 +253,37 @@ class Crawlers extends AbstractProvider 'dotMailer content retrieval', 'dotSemantic', 'downforeveryoneorjustme', + 'Download\ Wonder', 'downnotifier\.com', 'DowntimeDetector', 'Dragonfly File Reader', + 'Drip', 'drupact', 'Drupal \(\+http:\/\/drupal\.org\/\)', + 'DTS\ Agent', 'dubaiindex', 'EARTHCOM', 'Easy-Thumb', + 'EasyDL', + 'Ebingbong', 'ec2linkfinder', 'eCairn-Grabber', + 'eCatch', 'ECCP', 'echocrawl', 'eContext\/', + 'Ecxi', + 'EirGrabber', 'ElectricMonk', 'elefent', 'EMail Exractor', 'Email%20Extractor%20Lite', + 'EMail\ Wolf', 'EmailWolf', 'Embed PHP Library', 'Embedly', 'europarchive\.org', - 'evc-batch\/[0-9]', + 'evc-batch', 'EventMachine HttpClient', 'Evidon', 'Evrinid', @@ -234,6 +294,9 @@ class Crawlers extends AbstractProvider 'ExperianCrawlUK', 'Exploratodo', 'Express WebPictures', + 'ExtractorPro', + 'Extreme\ Picture\ Finder', + 'EyeNetIE', 'ezooms', 'facebookexternalhit', 'facebookplatform', @@ -257,16 +320,21 @@ class Crawlers extends AbstractProvider 'Fetch API', 'Fetch\/[0-9]', 'Fever\/[0-9]', + 'FHscan', + 'Fimap', 'findlink', 'findthatfile', + 'FlashGet', 'FlipboardBrowserProxy', 'FlipboardProxy', 'FlipboardRSS', 'fluffy', + 'Flunky', 'flynxapp', 'forensiq', 'FoundSeoTool\/[0-9]', 'free thumbnails', + 'Freeuploader', 'FreeWebMonitoring SiteChecker', 'Funnelback', 'G-i-g-a-b-o-t', @@ -277,17 +345,22 @@ class Crawlers extends AbstractProvider 'Genderanalyzer', 'Genieo', 'GentleSource', + 'Getintent', 'GetLinkInfo', 'getprismatic\.com', + 'GetRight', 'GetURLInfo\/[0-9]', + 'GetWeb', 'Ghost Inspector', 'GigablastOpenSource', 'GIS-LABS', 'github-camo', 'github\.com\/', 'Go [\d\.]* package http', - 'Go-http-client', 'Go http package', + 'Go!Zilla', + 'Go-Ahead-Got-It', + 'Go-http-client', 'gobyus', 'gofetch', 'GomezAgent', @@ -319,76 +392,98 @@ class Crawlers extends AbstractProvider 'GoSpotCheck', 'GoSquared-Status-Checker', 'gosquared-thumbnailer', + 'Gotit', + 'GoZilla', 'grabify', + 'GrabNet', + 'Grafula', 'Grammarly', - 'grouphigh', + 'GrapeFX', 'grokkit', + 'grouphigh', 'grub-client', 'gSOAP\/', + 'GT::WWW', 'GTmetrix', 'GuzzleHttp', 'gvfs\/', 'HAA(A)?RTLAND http client', + 'Haansoft', 'hackney\/', 'Hatena', + 'Havij', 'hawkReader', 'HEADMasterSEO', 'HeartRails_Capture', + 'help@dataminr\.com', 'heritrix', 'historious\/', 'hledejLevne\.cz\/[0-9]', + 'Hloader', + 'HMView', 'Holmes', 'HonesoSearchEngine\/', 'HootSuite Image proxy', 'Hootsuite-WebFeed\/[0-9]', + 'hosterstats', 'HostTracker', 'ht:\/\/check', 'htdig', - 'HTMLParser\/', + 'HTMLparser', 'http-get', 'HTTP-Header-Abfrage', 'http-kit', 'http-request\/', 'HTTP-Tiny', + 'HTTP::Lite', + 'http\.rb\/', 'HTTP_Compression_Test', 'http_request2', 'http_requester', 'HttpComponents', 'httphr', 'HTTPMon', - 'PEAR HTTPRequest', - 'http\.rb\/', 'httpscheck', 'httpssites_power', 'httpunit', 'HttpUrlConnection', 'httrack', - 'hosterstats', 'huaweisymantec', 'HubPages.*crawlingpolicy', 'HubSpot ', + 'Humanlinks', 'HyperZbozi.cz Feeder', 'i2kconnect\/', + 'Iblog', 'ichiro', + 'Id-search', 'IdeelaborPlagiaat', 'IDG Twitter Links Resolver', 'IDwhois\/[0-9]', 'Iframely', 'igdeSpyder', 'IlTrovatore', + 'Image\ Fetch', + 'Image\ Sucker', 'ImageEngine\/', 'Imagga', 'imgsizer', 'InAGist', 'inbound\.li parser', 'InDesign%20CC', + 'Indy\ Library', 'infegy', 'infohelfer', + 'InfoTekies', 'InfoWizards Reciprocal Link System PRO', - 'Instapaper', 'inpwrd\.com', + 'instabid', + 'Instapaper', 'Integrity', 'integromedb', + 'Intelliseek', + 'InterGET', + 'Internet\ Ninja', 'internet_archive', 'InternetSeer', 'internetVista monitor', @@ -396,17 +491,20 @@ class Crawlers extends AbstractProvider 'IODC', 'IOI', 'iplabel', - 'IPS\/[0-9]', 'ips-agent', + 'IPS\/[0-9]', 'IPWorks HTTP\/S Component', 'iqdb\/', + 'Iria', 'Irokez', 'isitup\.org', 'iskanie', 'iZSearch', 'janforman', 'Jaunt\/', + 'Jbrofuzz', 'Jersey\/', + 'JetCar', 'Jigsaw', 'Jobboerse', 'JobFeed discovery', @@ -415,13 +513,17 @@ class Crawlers extends AbstractProvider 'Jobrapido', 'Jobsearch1\.5', 'JoinVision Generic', + 'Joomla', + 'Jorgee', 'JS-Kit', + 'JustView', 'Kaspersky Lab CFR link resolver', 'KeepRight OpenStreetMap Checker', 'Kelny\/', 'Kerrigan\/', 'KeyCDN', 'Keyword Extractor', + 'Keyword\ Density', 'Keywords Research', 'KickFire', 'KimonoLabs\/', @@ -435,11 +537,18 @@ class Crawlers extends AbstractProvider 'Larbin', 'Lavf\/', 'LayeredExtractor', + 'LeechFTP', + 'LeechGet', 'letsencrypt', + 'Lftp', 'LibVLC', + 'LibWeb', + 'Libwhisker', 'libwww', 'Licorne Image Snapshot', 'Liferea\/', + 'Lightspeedsystems', + 'Likse', 'link checker', 'Link Valet', 'link_thumbnailer', @@ -450,72 +559,101 @@ class Crawlers extends AbstractProvider 'linkfluence', 'linkpeek', 'LinkPreviewGenerator', + 'LinkScan', + 'LinksManager', 'LinkTiger', 'LinkWalker', 'Lipperhey', + 'Litemage_walker', 'livedoor ScreenShot', - 'LoadImpactPageAnalyzer', 'LoadImpactRload', 'LongURL API', 'looksystems\.net', 'ltx71', 'lua-resty-http', + 'lwp-request', 'lwp-trivial', + 'LWP::Simple', 'lycos', 'LYT\.SR', 'mabontland', + 'Mag-Net', 'MagpieRSS', 'Mail.Ru', 'MailChimp', + 'Majestic12', 'makecontact\/', 'Mandrill', 'MapperCmd', 'marketinggrader', + 'MarkMonitor', + 'MarkWatch', + 'Mass\ Downloader', 'masscan\/[0-9]', + 'Mata\ Hari', 'Mediapartners-Google', + 'mediawords', 'MegaIndex\.ru', 'Melvil Rawi\/', 'MergeFlow-PageReader', 'Metaspinner', 'MetaURI', + 'MFC_Tear_Sample', 'Microsearch', - 'Microsoft-WebDAV-MiniRedir', - 'Microsoft Data Access Internet Publishing Provider Protocol', 'Microsoft Office ', 'Microsoft Windows Network Diagnostics', - 'Miniature.io\/', + 'Microsoft-WebDAV-MiniRedir', + 'Microsoft\ Data\ Access', + 'MIDown\ tool', + 'MIIxpc', 'Mindjet', + 'Miniature.io\/', 'Miniflux', + 'Mister\ PiX', 'mixdata dot com', 'mixed-content-scan', 'mixnode', 'Mnogosearch', 'mogimogi', + 'Mojeek', 'Mojolicious \(Perl\)', 'monitis', 'Monitority\/[0-9]', 'montastic', 'MonTools', 'Moreover', + 'Morfeus\ Fucking\ Scanner', 'Morning Paper', - 'mowser', 'MovableType', + 'mowser', 'Mrcgiguy', + 'MS\ Web\ Services\ Client\ Protocol', + 'MSFrontPage', 'mShots', - 'MxToolbox\/', 'MuckRack\/', + 'muhstik-scan', 'MVAClient', + 'MxToolbox\/', 'nagios', 'Najdi\.si\/', - 'Needle\/', + 'Name\ Intelligence', + 'Nameprotect', + 'Navroad', + 'NearSite', + 'Needle', + 'Nessus', + 'Net\ Vampire', + 'NetAnts', 'NETCRAFT', - 'NetLyzer FastProbe', + 'NetLyzer', + 'NetMechanic', 'Netpursual', 'netresearch', 'NetShelter ContentScan', 'Netsparker', 'NetTrack', 'Netvibes', + 'NetZIP', 'Neustar WPM', 'NeutrinoAPI', 'NewRelicPinger\/1.0 \(\d+\)', @@ -525,6 +663,9 @@ class Crawlers extends AbstractProvider 'newspaper\/', 'Nexgate Ruby Client', 'NG-Search', + 'Nibbler', + 'NICErsPRO', + 'Nikto', 'nineconnections\.com', 'NLNZ_IAHarvester', 'Nmap Scripting Engine', @@ -540,57 +681,80 @@ class Crawlers extends AbstractProvider 'Nuzzel', 'nWormFeedFinder', 'Nymesis', + 'NYU', 'Ocelli\/[0-9]', + 'Octopus', 'oegp', 'Offline Explorer', + 'Offline\ Navigator', 'okhttp', 'Omea Reader', 'omgili', 'OMSC', 'Online Domain Tools', 'OpenCalaisSemanticProxy', + 'Openfind', + 'OpenLinkProfiler', 'Openstat\/', 'OpenVAS', 'Optimizer', 'Orbiter', 'OrgProbe\/[0-9]', 'orion-semantics', - 'Owler', 'ow\.ly', + 'Owler', 'ownCloud News', 'OxfordCloudService\/[0-9]', 'Page Analyzer', 'Page Valet', 'page2rss', + 'page\ scorer', 'page_verifier', + 'PageAnalyzer', + 'PageGrabber', 'PagePeeker', + 'PageScorer', 'Pagespeed\/[0-9]', 'Panopta', 'panscient', + 'Papa\ Foto', 'parsijoo', + 'Pavuk', 'PayPal IPN', + 'pcBrowser', 'Pcore-HTTP', + 'PEAR HTTPRequest', 'Pearltrees', + 'PECL::HTTP', 'peerindex', 'Peew', + 'PeoplePal', 'Perlu -', - 'PhantomJS\/', 'PhantomJS Screenshoter', + 'PhantomJS\/', 'Photon\/', 'phpcrawl', 'phpservermon', 'Pi-Monster', + 'Picscout', + 'Picsearch', + 'PictureFinder', + 'Pimonster', 'ping\.blo\.gs\/', 'Pingability', 'Pingdom', 'Pingoscope', 'PingSpot', 'pinterest\.com', + 'Pixray', 'Pizilla', + 'PleaseCrawl', 'Ploetz \+ Zeller', 'Plukkie', + 'plumanalytics', 'PocketParser', - 'POE-Component-Client-HTTP\/', + 'Pockey', + 'POE-Component-Client-HTTP', 'Pompos', 'Porkbun', 'Port Monitor', @@ -600,43 +764,51 @@ class Crawlers extends AbstractProvider 'postrank', 'PowerPoint\/', 'Priceonomics Analysis Engine', - 'PritTorrent\/[0-9]', 'PrintFriendly\.com', + 'PritTorrent\/[0-9]', 'Prlog', 'probethenet', 'Project 25499', 'Promotion_Tools_www.searchenginepromotionhelp.com', 'prospectb2b', 'Protopage', + 'ProWebWalker', 'proximic', 'PRTG Network Monitor', 'pshtt, https scanning', 'PTST ', 'PTST\/[0-9]+', 'Pulsepoint XT3 web scraper', + 'Pump', 'Python-httplib2', 'python-requests', 'Python-urllib', 'Qirina Hurdler', 'QQDownload', - 'Qseero', 'QrafterPro', + 'Qseero', 'Qualidator.com SiteAnalyzer', + 'QueryN\ Metasearch', 'Quora Link Preview', 'Qwantify', 'Radian6', + 'RankActive', + 'RankFlex', 'RankSonicSiteAuditor', 'Readability', + 'RealDownload', 'RealPlayer%20Downloader', 'RebelMouse', + 'Recorder', 'RecurPost\/', 'redback\/', 'Redirect Checker Tool', 'ReederForMac', + 'ReGet', + 'RepoMonkey', 'request\.js', 'ResponseCodeTest\/[0-9]', 'RestSharp', - 'RetrevoPageAnalyzer', 'Riddler', 'Rival IQ', 'Robosourcer', @@ -650,20 +822,25 @@ class Crawlers extends AbstractProvider 'SauceNAO', 'SBIder', 'scalaj-http', + 'scan\.lol', + 'ScanAlert', 'Scoop', 'scooter', 'ScoutJet', 'ScoutURLMonitor', 'Scrapy', + 'Screaming', 'ScreenShotService\/[0-9]', 'Scrubby', - 'search\.thunderstone', 'Search37\/', + 'search\.thunderstone', + 'Searchestate', 'SearchSight', 'Seeker', 'semanticdiscovery', 'semanticjuice', 'Semiocast HTTP client', + 'Semrush', 'sentry\/', 'SEO Browser', 'Seo Servis', @@ -671,42 +848,58 @@ class Crawlers extends AbstractProvider 'Seobility', 'SEOCentro', 'SeoCheck', + 'SEOkicks', + 'Seomoz', + 'SEOprofiler', 'SeopultContentAnalyzer', + 'seoscanners', + 'SEOstats', 'Server Density Service Monitoring', 'servernfo\.com', 'SetCronJob\/', - 'Seznam screenshot-generator', + 'sexsearcher', + 'Seznam', 'Shelob', + 'Shodan', 'Shoppimon Analyzer', 'ShoppimonAgent\/[0-9]', 'ShopWiki', 'ShortLinkTranslate', 'shrinktheweb', - 'SilverReader', 'Sideqik', + 'SilverReader', 'SimplePie', 'SimplyFast', - 'Sitebulb\/', - 'SiteIndexed', + 'Siphon', + 'SISTRIX', 'Site-Shot\/', 'Site24x7', + 'Site\ Sucker', 'SiteBar', + 'Sitebeam', + 'Sitebulb\/', 'SiteCondor', - 'siteexplorer\.info', + 'SiteExplorer', 'SiteGuardian', - 'Siteimprove\.com', + 'Siteimprove', + 'SiteIndexed', 'Sitemap(s)? Generator', 'SiteMonitor', 'Siteshooter B0t', + 'SiteSnagger', 'SiteSucker', 'SiteTruth', + 'Sitevigil', 'sitexy\.com', 'SkypeUriPreview', 'Slack\/', 'slider\.com', 'slurp', + 'SlySearch', + 'SmartDownload', 'SMRF URL Expander', 'SMUrlExpander', + 'Snake', 'Snappy', 'SniffRSS', 'sniptracker', @@ -714,7 +907,11 @@ class Crawlers extends AbstractProvider 'SnowHaze Search', 'sogou web', 'SortSite', + 'Sottopop', 'sovereign\.ai', + 'SpaceBison', + 'Spammen', + 'Spanner', 'spaziodati', 'Specificfeeds', 'speedy', @@ -722,58 +919,78 @@ class Crawlers extends AbstractProvider 'Spinn3r', 'spray-can', 'Sprinklr ', - 'sqlmap', 'spyonweb', + 'sqlmap', + 'Sqlworm', 'Sqworm', 'SSL Labs', 'ssl-tools', 'StackRambler', 'Statastico\/', 'StatusCake', + 'Steeler', 'Stratagems Kumo', 'Stroke.cz', 'StudioFACA', 'suchen', + 'Sucuri', 'summify', 'Super Monitoring', + 'SuperHTTP', 'Surphace Scout', + 'Suzuran', 'SwiteScraper', + 'Symfony BrowserKit', 'Symfony2 BrowserKit', 'SynHttpClient-Built', 'Sysomos', - 'Symfony BrowserKit', + 'sysscan', + 'Szukacz', 'T0PHackTeam', + 'tAkeOut', 'Tarantula\/', 'Taringa UGC', + 'Teleport', + 'Telesoft', + 'Telesphoreo', + 'Telesphorep', 'Tenon\.io', 'teoma', 'terrainformatica\.com', 'Test Certificate Info', 'Tetrahedron\/[0-9]', - 'Thinklab', 'The Drop Reaper', 'The Expert HTML Source Viewer', + 'The\ Intraformant', 'theinternetrules', + 'TheNomad', 'theoldreader\.com', + 'Thinklab', 'Thumbshots', 'ThumbSniper', 'TinEye', 'Tiny Tiny RSS', 'TLSProbe\/', + 'Toata', 'topster', 'touche.com', 'Traackr.com', 'TrapitAgent', + 'Trendiction', 'Trendsmap Resolver', 'trendspottr\.com', 'truwoGPS', 'TulipChain', - 'Twisted PageGetter', + 'Turingos', + 'Turnitin', 'tweetedtimes\.com', 'Tweetminster', 'Tweezler\/', + 'Twice', 'Twikle', 'Twingly', + 'Twisted PageGetter', + 'Typhoeus', 'ubermetrics-technologies', 'uclassify', 'uCrawlr\/', @@ -792,12 +1009,20 @@ class Crawlers extends AbstractProvider 'urlresolver', 'Urlstat', 'UrlTrends Ranking Updater', + 'URLy\ Warning', + 'URLy\.Warning', + 'Vacuum', 'Vagabondo', + 'VB\ Project', 'vBSEO', + 'VCI', 'via ggpht\.com GoogleImageProxy', - 'VidibleScraper\/', + 'VidibleScraper', + 'Virusdie', 'visionutils', 'vkShare', + 'VoidEYE', + 'Voil', 'voltron', 'voyager\/', 'VSAgent\/[0-9]', @@ -809,6 +1034,8 @@ class Crawlers extends AbstractProvider 'W3C-mobileOK', 'W3C_I18n-Checker', 'W3C_Unicorn', + 'Wallpapers\/[0-9]+', + 'WallpapersHD', 'wangling', 'Wappalyzer', 'WatchMouse', @@ -816,7 +1043,17 @@ class Crawlers extends AbstractProvider 'web-capture\.net', 'Web-Monitoring', 'Web-sniffer', + 'Web\ Auto', + 'Web\ Collage', + 'Web\ Enhancer', + 'Web\ Fetch', + 'Web\ Fuck', + 'Web\ Pix', + 'Web\ Sauger', + 'Web\ Sucker', + 'Webalta', 'Webauskunft', + 'WebAuto', 'WebCapture', 'WebClient\/', 'webcollage', @@ -824,20 +1061,37 @@ class Crawlers extends AbstractProvider 'WebCopier', 'WebCorp', 'WebDoc', + 'WebEnhancer', 'WebFetch', + 'WebFuck', + 'WebGo\ IS', + 'WebImageCollector', 'WebImages', 'WebIndex', 'webkit2png', + 'WebLeacher', 'webmastercoffee', 'webmon ', + 'WebPix', + 'WebReaper', + 'WebSauger', 'webscreenie', + 'Webshag', 'Webshot', 'Website Analyzer\/', + 'Website\ Quester', + 'WebsiteExtractor', 'websitepulse agent', 'websitepulse[+ ]checker', + 'WebsiteQuester', 'Websnapr\/', + 'Webster', + 'WebStripper', + 'WebSucker', 'Webthumb\/[0-9]', 'WebThumbnail', + 'WebWhacker', + 'WebZIP', 'WeCrawlForThePeace', 'WeLikeLinks', 'WEPA', @@ -865,8 +1119,13 @@ class Crawlers extends AbstractProvider 'WPScan', 'wscheck', 'Wtrace', + 'WWW-Collector-E', 'WWW-Mechanize', + 'WWW::Mechanize', 'www\.monitor\.us', + 'WWWOFFLE', + 'x09Mozilla', + 'x22Mozilla', 'XaxisSemanticsClassifier', 'Xenu Link Sleuth', 'XING-contenttabreceiver\/[0-9]', @@ -883,17 +1142,20 @@ class Crawlers extends AbstractProvider 'Yandex(?!Search)', 'yanga', 'yeti', - ' YLT', 'Yo-yo', 'Yoleo Consumer', 'yoogliFetchAgent', 'YottaaMonitor', - 'yourls\.org', 'Your-Website-Sucks\/[0-9]', + 'yourls\.org', + 'Zade', 'Zao', + 'Zauba', 'Zemanta Aggregator', 'Zend\\\\Http\\\\Client', 'Zend_Http_Client', + 'Zermelo', + 'Zeus', 'zgrab', 'ZnajdzFoto', 'ZyBorg', diff --git a/tests/crawlers.txt b/tests/crawlers.txt index b79085b..a632781 100644 --- a/tests/crawlers.txt +++ b/tests/crawlers.txt @@ -3247,4 +3247,9 @@ Go http package HonesoSearchEngine/1.0 orion-semantics.com 0.1 ExperianCrawlUK (andrew dot swanton at phgroup dot com) -Trendsmap Resolver/0.0.1 \ No newline at end of file +Trendsmap Resolver/0.0.1 +Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; FunWebProducts; .NET CLR 1.1.4322; PeoplePal 6.2) +Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt; DTS Agent +Mozilla/3.0 (compatible; Indy Library) +VB Project +webster-internet.de pad browser \ No newline at end of file diff --git a/tests/devices.txt b/tests/devices.txt index a8f42ca..c71bef6 100644 --- a/tests/devices.txt +++ b/tests/devices.txt @@ -200,7 +200,6 @@ Mozilla Mozilla/1.1 (compatible; MSPIE 2.0; Windows CE) Mozilla/2.0 (compatible; crw) Mozilla/2.0 (compatible; MSIE 3.0; AK; Windows 95) -Mozilla/3.0 (compatible; Indy Library) mozilla/3.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/5.0.1 Mozilla/3.6 (compatible; MSIE 7.0; Windows NT 6.1; en-US; rv:1.9.2.16) Gecko/20110319 MRA 5.6 (build 03278) Firefox/3.6.16 Mozilla/37.0.2 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322) @@ -211,7 +210,6 @@ Mozilla/4.0 (compatible MSIE 7.0 Windows NT 5.1 MRSPUTNIK 2, 3, 0, 289 .NET Mozilla/4.0 (compatible MSIE 7.0 Windows NT 5.1 Trident/4.0 MyIE2 MRSPUTNIK 2, 3, 0, 293 MRA 5.7 (build 03773) .NET CLR 2.0.50727 .NET CLR 3.0.4506.2152 .NET CLR 3.5.30729) Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0) Mozilla/4.0 (compatible; MSIE 5.0; Windows 98; DigExt) -Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt; DTS Agent Mozilla/4.0 (compatible; MSIE 5.5; Windows 98) Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Crazy Browser 1.x.x) Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0) @@ -264,7 +262,6 @@ Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729) Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; BTRS103284; InfoPath.2) Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; chromeframe/15.0.874.121; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; InfoPath.1) -Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; FunWebProducts; .NET CLR 1.1.4322; PeoplePal 6.2) Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; FunWebProducts; MRA 4.6 (build 01425); .NET CLR 1.1.4322; .NET CLR 2.0.50727) Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; GTB7.4; .NET CLR 1.0.3705; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729) Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; InfoPath.1; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; .NET4.0E) @@ -21613,11 +21610,9 @@ UCWEB/2.0(Symbian; U; S60 V5; en-US; Nokia5233) U2/1.0.0 UCBrowser/8.8.0.245 U2/ University of Warwick redirection service, go.warwick.ac.uk User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31 User-Agent⇥Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) -VB Project VPNGate/5.0 (Macintosh; Intel Mac OS X 10_9) AppleWebKit/537.71 (KHTML, like Gecko) Version/7.0 Safari/D64567 WannaBe (Macintosh; PPC) WebProcess/8537.75.14 CFNetwork/596.6.2 Darwin/12.5.0 (x86_64) (MacBookPro9%2C1) -webster-internet.de pad browser woot woot www.splashaccess.net Mozilla/5.0 (Linux; U; Android 5.1; zh-cn; 1501_M02 Build/LMY47D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 SogouMSE,SogouMobileBrowser/4.1.0