From 3b976fd408bec9e81524c7582d484fc6dfcb1115 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Wed, 23 Jul 2014 09:57:10 +1200 Subject: [PATCH 01/32] NEW: Adding link tracking css class and using built in reporting --- README.md | 6 --- code/jobs/CheckExternalLinksJob.php | 8 +++- code/model/BrokenExternalLinks.php | 44 ----------------- code/tasks/CheckExternalLinks.php | 73 ++++++++++++++++++----------- 4 files changed, 52 insertions(+), 79 deletions(-) delete mode 100644 code/model/BrokenExternalLinks.php diff --git a/README.md b/README.md index f939d47..2196e70 100644 --- a/README.md +++ b/README.md @@ -38,12 +38,6 @@ Add the following code to the mysite config to run the job every 24 hours (86400 `Config::inst()->update('CheckExternalLinks', 'QueuedJob', 86400);` -## Disable the Broken external link menu - -To disable the *Broken Ext. Links* menu add the following code to mysite/_config.php - -`CMSMenu::remove_menu_item('BrokenExternalLinksAdmin');` - ## TODO ## Fix setting the class attribute of broken links to ss-broken diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index 20ee9c6..6c9c34b 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -9,7 +9,7 @@ class CheckExternalLinksJob extends AbstractQueuedJob { public static $regenerate_time = 43200; public function __construct() { - $this->pagesToProcess = DB::query('SELECT "ID" FROM "SiteTree_Live" WHERE "ShowInSearch"=1')->column(); + $this->pagesToProcess = SiteTree::get(); $this->currentStep = 0; $this->totalSteps = count($this->pagesToProcess); } @@ -49,7 +49,7 @@ public function setup() { $restart = $this->currentStep == 0; if ($restart) { - $this->pagesToProcess = DB::query('SELECT "ID" FROM SiteTree_Live WHERE ShowInSearch=1')->column(); + $this->pagesToProcess = SiteTree::get(); } } @@ -63,6 +63,10 @@ public function prepareForRestart() { public function process() { $task = new CheckExternalLinks(); $task->run(); + $data = $this->getJobData(); + $completedPages = $task->getCompletedPages(); + $totalPages = $task->getTotalPages(); + $this->addMessage("$completedPages/$totalPages pages completed"); $this->completeJob(); } diff --git a/code/model/BrokenExternalLinks.php b/code/model/BrokenExternalLinks.php deleted file mode 100644 index 4e73c1d..0000000 --- a/code/model/BrokenExternalLinks.php +++ /dev/null @@ -1,44 +0,0 @@ - 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. - 'HTTPCode' =>'Int' - ); - - private static $has_one = array( - 'Page' => 'Page' - ); - - public static $summary_fields = array( - 'Page.Title' => 'Page', - 'HTTPCode' => 'HTTP Code', - 'Created' => 'Created' - ); - - public static $searchable_fields = array( - 'HTTPCode' => array('title' => 'HTTP Code') - ); - - function canEdit($member = false) { - return false; - } - -} - -class BrokenExternalLinksAdmin extends ModelAdmin { - - public static $url_segment = 'broken-external-links-admin'; - - public static $managed_models = array( - 'BrokenExternalLinks' - ); - - public static $menu_title = 'Broken Ext. links'; - - public function init() { - parent::init(); - } - -} diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index 1232acf..830bb54 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -7,26 +7,46 @@ class CheckExternalLinks extends BuildTask { protected $enabled = true; + private $completedPages; + private $totalPages; + + public function getCompletedPages() { + return $this->completedPages; + } + + public function getTotalPages() { + return $this->totalPages; + } + function run($request) { - // clear broken external link table - $table = 'BrokenExternalLinks'; - if(method_exists(DB::getConn(), 'clearTable')) DB::getConn()->clearTable($table); - else DB::query("TRUNCATE \"$table\""); - $pages = SiteTree::get(); + $pages = Versioned::get_by_stage('SiteTree', 'Live'); foreach ($pages as $page) { + ++$this->totalPages; + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); + if (!$htmlValue->isValid()) { + continue; + } // Populate link tracking for internal links & links to asset files. if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) { + $class = $link->getAttribute('class'); + $pos = stripos($class, 'ss-broken'); + if ($pos !== false && $page->HasBrokenLink == 1) continue; + $href = Director::makeRelative($link->getAttribute('href')); if ($href == 'admin/') continue; - // ignore SiteTree and assets links as they will be caught by SiteTreeLinkTracking - if(preg_match('/\[sitetree_link,id=([0-9]+)\]/i', $href, $matches)) { + // ignore SiteTree, anchor and assets links as they will be caught + // by SiteTreeLinkTracking + if(preg_match('/\[(file_link|sitetree_link),id=([0-9]+)\]/i', $href, $matches)) { + continue; + } else if (isset($href[0]) && $href[0] == '#') { continue; } else if(substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR.'/') { continue; } + if($href && function_exists('curl_init')) { $handle = curl_init($href); curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); @@ -36,31 +56,30 @@ function run($request) { if (($httpCode < 200 || $httpCode > 302) || ($href == '' || $href[0] == '/')) { - $brokenLink = new BrokenExternalLinks(); - $brokenLink->PageID = $page->ID; - $brokenLink->Link = $href; - $brokenLink->HTTPCode = $httpCode; - $brokenLink->write(); - - // TODO set the broken link class - /* - $class = $link->getAttribute('class'); - $class = ($class) ? $class . 'ss-broken' : 'ss-broken'; - $link->setAttribute('class', ($class ? "$class ss-broken" : 'ss-broken')); - */ - - // use raw sql query to set broken link as calling the dataobject write - // method will reset the links if no broken internal links are found - $query = "UPDATE \"SiteTree\" SET \"HasBrokenLink\" = 1 "; - $query .= "WHERE \"ID\" = " . (int)$page->ID; - $result = DB::query($query); - if (!$result) { - // error updating hasBrokenLink + // set the broken link class + $class = ($class && stripos($class, 'ss-broken')) ? + $class . ' ss-broken' : 'ss-broken'; + $link->setAttribute('class', ($class ? $class : 'ss-broken')); + $htmlValue->__call('saveHTML', array()); + + $page->Content = $htmlValue->getContent(); + $page->write(); + + if (!$page->HasBrokenLink) { + // bypass the ORM as syncLinkTracking does not allow you + // to update HasBrokenLink to true + $query = "UPDATE \"SiteTree_Live\" SET \"HasBrokenLink\" = 1 "; + $query .= "WHERE \"ID\" = " . (int)$page->ID; + $result = DB::query($query); + if (!$result) { + $this->debugMessage('Error updating HasBrokenLink'); + } } } } } + ++$this->completedPages; } // run this again if queued jobs exists and is a valid int From 72dc652ecd7486bfdea59bf7785a2951fb9fb846 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Thu, 24 Jul 2014 14:20:48 +1200 Subject: [PATCH 02/32] NEW: Setting up Queued job and task to support being sent a single page --- README.md | 7 +-- code/jobs/CheckExternalLinksJob.php | 89 ++++++++++++----------------- code/tasks/CheckExternalLinks.php | 9 ++- 3 files changed, 46 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index 2196e70..408a914 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,6 @@ The external links module is a task and ModelAdmin to track and to report on bro ## Features * Add external links to broken links reports -* Add a model admin for external broken links * Add a task to track external broken links ## Installation @@ -25,8 +24,7 @@ The external links module is a task and ModelAdmin to track and to report on bro 3. Make sure the folder after being extracted is named 'externallinks' 4. Place this directory in your sites root directory. This is the one with framework and cms in it. 5. Run in your browser - `/dev/build` to rebuild the database. - 6. You should see a new menu called *Broken Ext. Links* - 7. Run the following task *http://path.to.silverstripe/dev/tasks/CheckExternalLinks* to check for broken external links + 6. Run the following task *http://path.to.silverstripe/dev/tasks/CheckExternalLinks* to check for broken external links ## Dev task ## @@ -38,6 +36,3 @@ Add the following code to the mysite config to run the job every 24 hours (86400 `Config::inst()->update('CheckExternalLinks', 'QueuedJob', 86400);` -## TODO ## - -Fix setting the class attribute of broken links to ss-broken diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index 6c9c34b..381df5b 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -1,82 +1,69 @@ pagesToProcess = SiteTree::get(); + $this->pagesToProcess = Versioned::get_by_stage('SiteTree', 'Live')->column(); $this->currentStep = 0; $this->totalSteps = count($this->pagesToProcess); } - /** - * Sitemap job is going to run for a while... - */ - public function getJobType() { - return QueuedJob::QUEUED; - } - - /** - * @return string - */ public function getTitle() { return 'Checking external links'; } - /** - * Return a signature for this queued job - * - * For the generate sitemap job, we only ever want one instance running, so just use the class name - * - * @return String - */ + public function getJobType() { + return QueuedJob::QUEUED; + } + public function getSignature() { return md5(get_class($this)); } - /** - * Note that this is duplicated for backwards compatibility purposes... - */ - public function setup() { + public function setup() { parent::setup(); - increase_time_limit_to(); - $restart = $this->currentStep == 0; - if ($restart) { - $this->pagesToProcess = SiteTree::get(); + $this->pagesToProcess = Versioned::get_by_stage('SiteTree', 'Live')->column(); } + } /** - * On any restart, make sure to check that our temporary file is being created still. + * Check a individual page */ - public function prepareForRestart() { - parent::prepareForRestart(); - } - public function process() { + $remainingPages = $this->pagesToProcess; + if (!count($remainingPages)) { + $this->isComplete = true; + return; + } + + // lets process our first item - note that we take it off the list of things left to do + $ID = array_shift($remainingPages); + + // get the page + $page = Versioned::get_by_stage('SiteTree', 'Live', 'ID = '.$ID); + + if (!$page || !$page->Count()) { + $this->addMessage("Page ID #$ID could not be found, skipping"); + } + $task = new CheckExternalLinks(); - $task->run(); - $data = $this->getJobData(); - $completedPages = $task->getCompletedPages(); - $totalPages = $task->getTotalPages(); - $this->addMessage("$completedPages/$totalPages pages completed"); - $this->completeJob(); - } + $task->run($page); - /** - * Outputs the completed file to the site's webroot - */ - protected function completeJob() { - $this->isComplete = 1; - $nextgeneration = new CheckExternalLinksJob(); - singleton('QueuedJobService')->queueJob($nextgeneration, - date('Y-m-d H:i:s', time() + self::$regenerate_time)); + // and now we store the new list of remaining children + $this->pagesToProcess = $remainingPages; + $this->currentStep++; + + if (!count($remainingPages)) { + $this->isComplete = true; + return; + } } -} \ No newline at end of file + +} diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index 830bb54..3a61fb4 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -19,7 +19,11 @@ public function getTotalPages() { } function run($request) { - $pages = Versioned::get_by_stage('SiteTree', 'Live'); + if (isset($request->ID)) { + $pages = $request; + } else { + $pages = Versioned::get_by_stage('SiteTree', 'Live'); + } foreach ($pages as $page) { ++$this->totalPages; @@ -63,9 +67,10 @@ function run($request) { $htmlValue->__call('saveHTML', array()); $page->Content = $htmlValue->getContent(); - $page->write(); + $page->owner->write(); if (!$page->HasBrokenLink) { + // bypass the ORM as syncLinkTracking does not allow you // to update HasBrokenLink to true $query = "UPDATE \"SiteTree_Live\" SET \"HasBrokenLink\" = 1 "; From a3c693189bc396094f9c94ad62548161e05b2c4d Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Thu, 24 Jul 2014 14:50:14 +1200 Subject: [PATCH 03/32] NEW: Adding back in the Broken links model no model admin this time though --- code/model/BrokenExternalLinks.php | 28 ++++++++++++++++++++++++++++ code/tasks/CheckExternalLinks.php | 6 ++++++ 2 files changed, 34 insertions(+) create mode 100644 code/model/BrokenExternalLinks.php diff --git a/code/model/BrokenExternalLinks.php b/code/model/BrokenExternalLinks.php new file mode 100644 index 0000000..e002405 --- /dev/null +++ b/code/model/BrokenExternalLinks.php @@ -0,0 +1,28 @@ + 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. + 'HTTPCode' =>'Int' + ); + + private static $has_one = array( + 'Page' => 'Page' + ); + + public static $summary_fields = array( + 'Page.Title' => 'Page', + 'HTTPCode' => 'HTTP Code', + 'Created' => 'Created' + ); + + public static $searchable_fields = array( + 'HTTPCode' => array('title' => 'HTTP Code') + ); + + function canEdit($member = false) { + return false; + } + +} diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index 3a61fb4..ba2f148 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -60,6 +60,12 @@ function run($request) { if (($httpCode < 200 || $httpCode > 302) || ($href == '' || $href[0] == '/')) { + $brokenLink = new BrokenExternalLinks(); + $brokenLink->PageID = $page->ID; + $brokenLink->Link = $href; + $brokenLink->HTTPCode = $httpCode; + $brokenLink->write(); + // set the broken link class $class = ($class && stripos($class, 'ss-broken')) ? $class . ' ss-broken' : 'ss-broken'; From bb302882524d654bd8621d97723ef09930372b14 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Mon, 28 Jul 2014 11:23:33 +1200 Subject: [PATCH 04/32] NEW: Adding report link to setup new queued job --- _config/routes.yml | 7 ++ code/controllers/CMSExternalLinks.php | 20 +++++ code/jobs/CheckExternalLinksJob.php | 4 +- code/reports/BrokenExternalLinksReport.php | 87 ++++++++++++++++++++++ 4 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 _config/routes.yml create mode 100644 code/controllers/CMSExternalLinks.php create mode 100644 code/reports/BrokenExternalLinksReport.php diff --git a/_config/routes.yml b/_config/routes.yml new file mode 100644 index 0000000..2284d3a --- /dev/null +++ b/_config/routes.yml @@ -0,0 +1,7 @@ +--- +Name: externallink +After: framework/routes +--- +Director: + rules: + 'admin/externallinks//$Action': 'CMSExternalLinks_Controller' diff --git a/code/controllers/CMSExternalLinks.php b/code/controllers/CMSExternalLinks.php new file mode 100644 index 0000000..e6769d1 --- /dev/null +++ b/code/controllers/CMSExternalLinks.php @@ -0,0 +1,20 @@ +queueJob($externalLinks); + + // redirect to the jobs page + $admin = QueuedJobsAdmin::create(); + $this->Redirect($admin->Link()); + } +} diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index 381df5b..ba569ca 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -1,5 +1,7 @@ 'Checked', + 'Link' => 'External Link', + 'HTTPCode' => 'HTTP Error Code', + 'PageLink' => array( + 'title' => 'Page link is on', + 'link' => true + ), + ); + + public function init() { + parent::init(); + } + + /** + * Returns the report title + * + * @return string + */ + public function title() { + return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS',"External broken links report"); + } + + /** + * Returns the column names of the report + * + * @return array + */ + public function columns() { + return self::$columns; + } + + /** + * Alias of columns(), to support the export to csv action + * in {@link GridFieldExportButton} generateExportFileData method. + * @return array + */ + public function getColumns() { + return $this->columns(); + } + + public function sourceRecords() { + $returnSet = new ArrayList(); + $links = BrokenExternalLinks::get(); + foreach ($links as $link) { + $link->PageLink = $link->Page()->Title; + $returnSet->push($link); + } + return $returnSet; + } + + public function getCMSFields() { + $fields = parent::getCMSFields(); + if (class_exists('AbstractQueuedJob')) { + $button = ''; + $runReportButton = new LiteralField( + 'runReport', + sprintf( + $button, + 'admin/externallinks/createQueuedReport', + _t('ExternalBrokenLinksReport.RUNREPORT', 'Create new report') + ) + ); + $fields->push($runReportButton); + $reportResultSpan = ''; + $reportResult = new LiteralField('ResultTitle', $reportResultSpan); + $fields->push($reportResult); + } + return $fields; + } +} From d25adca17506b326c0aef69ba993201bee092ba2 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Mon, 28 Jul 2014 12:39:19 +1200 Subject: [PATCH 05/32] NEW: Adding tests and code tidyup --- README.md | 10 +++--- ...ternalLinks.php => BrokenExternalLink.php} | 7 +++- code/reports/BrokenExternalLinksReport.php | 2 +- code/tasks/CheckExternalLinks.php | 6 ++-- tests/ExternalLinksTest.php | 34 +++++++++++++++++++ tests/ExternalLinksTest.yml | 7 ++++ 6 files changed, 58 insertions(+), 8 deletions(-) rename code/model/{BrokenExternalLinks.php => BrokenExternalLink.php} (66%) create mode 100644 tests/ExternalLinksTest.php create mode 100644 tests/ExternalLinksTest.yml diff --git a/README.md b/README.md index 408a914..ffb35a3 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,10 @@ The external links module is a task and ModelAdmin to track and to report on bro Run the following task *http://path.to.silverstripe/dev/tasks/CheckExternalLinks* to check your site for external broken links. If you have the queuedjobs module installed you can set the task to be run every so ofter -Add the following code to the mysite config to run the job every 24 hours (86400 seconds) - -`Config::inst()->update('CheckExternalLinks', 'QueuedJob', 86400);` - +Add the following yml config to config.yml in mysite/_config have the the task run once every day (86400 seconds) +`--- +Name: externallinkssettings +--- +CheckExternalLinks: + Delay: 86400` diff --git a/code/model/BrokenExternalLinks.php b/code/model/BrokenExternalLink.php similarity index 66% rename from code/model/BrokenExternalLinks.php rename to code/model/BrokenExternalLink.php index e002405..1de3554 100644 --- a/code/model/BrokenExternalLinks.php +++ b/code/model/BrokenExternalLink.php @@ -1,6 +1,6 @@ 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. @@ -25,4 +25,9 @@ function canEdit($member = false) { return false; } + function canView($member = false) { + $member = $member ? $member : Member::currentUser(); + $codes = array('content-authors', 'administrators'); + return Permission::checkMember($member, $codes); + } } diff --git a/code/reports/BrokenExternalLinksReport.php b/code/reports/BrokenExternalLinksReport.php index 5a7e79d..4bfe568 100644 --- a/code/reports/BrokenExternalLinksReport.php +++ b/code/reports/BrokenExternalLinksReport.php @@ -57,7 +57,7 @@ public function getColumns() { public function sourceRecords() { $returnSet = new ArrayList(); - $links = BrokenExternalLinks::get(); + $links = BrokenExternalLink::get(); foreach ($links as $link) { $link->PageLink = $link->Page()->Title; $returnSet->push($link); diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index ba2f148..a6ef0b0 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -54,13 +54,15 @@ function run($request) { if($href && function_exists('curl_init')) { $handle = curl_init($href); curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); + curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); + curl_setopt($handle, CURLOPT_TIMEOUT, 10); $response = curl_exec($handle); $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); curl_close($handle); if (($httpCode < 200 || $httpCode > 302) || ($href == '' || $href[0] == '/')) { - $brokenLink = new BrokenExternalLinks(); + $brokenLink = new BrokenExternalLink(); $brokenLink->PageID = $page->ID; $brokenLink->Link = $href; $brokenLink->HTTPCode = $httpCode; @@ -94,7 +96,7 @@ function run($request) { } // run this again if queued jobs exists and is a valid int - $queuedJob = Config::inst()->get('CheckExternalLinks', 'QueuedJob'); + $queuedJob = Config::inst()->get('CheckExternalLinks', 'Delay'); if (isset($queuedJob) && is_int($queuedJob) && class_exists('QueuedJobService')) { $checkLinks = new CheckExternalLinksJob(); singleton('QueuedJobService') diff --git a/tests/ExternalLinksTest.php b/tests/ExternalLinksTest.php new file mode 100644 index 0000000..ae9d898 --- /dev/null +++ b/tests/ExternalLinksTest.php @@ -0,0 +1,34 @@ +objFromFixture('Page', 'working'); + $task = new CheckExternalLinks(); + $task->run($page); + $brokenLinks = BrokenExternalLinks::get(); + $this->assertEquals(0, $brokenLinks->count()); + } + + public function testBrokenLink() { + // uses http://192.0.2.1 for a broken link + $page = $this->objFromFixture('Page', 'broken'); + $task = new CheckExternalLinks(); + $task->run($page); + $brokenLinks = BrokenExternalLinks::get(); + $this->assertEquals(1, $brokenLinks->count()); + } + + public function testReportExists() { + $reports = SS_Report::get_reports(); + $reportNames = array(); + foreach($reports as $report) { + $reportNames[] = $report->class; + } + $this->assertContains('BrokenExternalLinksReport',$reportNames, + 'BrokenExternalLinksReport is in reports list'); + } +} diff --git a/tests/ExternalLinksTest.yml b/tests/ExternalLinksTest.yml new file mode 100644 index 0000000..8a25106 --- /dev/null +++ b/tests/ExternalLinksTest.yml @@ -0,0 +1,7 @@ +Page: + working: + Title: Working Link + Content: 'Localhost' + broken: + Title: Broken Link + Content: 'Broken' \ No newline at end of file From 093322fcd35b3506f358f9a0412899bdc65947e4 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Wed, 30 Jul 2014 12:34:39 +1200 Subject: [PATCH 06/32] NEW: Adding a option to run a batch job to the front end --- README.md | 15 +++-- code/controllers/CMSExternalLinks.php | 70 ++++++++++++++++++---- code/jobs/CheckExternalLinksJob.php | 3 +- code/model/BrokenExternalLink.php | 10 ++++ code/reports/BrokenExternalLinksReport.php | 7 ++- code/tasks/CheckExternalLinks.php | 30 ++++++---- javascript/BrokenExternalLinksReport.js | 45 ++++++++++++++ 7 files changed, 151 insertions(+), 29 deletions(-) create mode 100644 javascript/BrokenExternalLinksReport.js diff --git a/README.md b/README.md index ffb35a3..6999e68 100644 --- a/README.md +++ b/README.md @@ -26,15 +26,22 @@ The external links module is a task and ModelAdmin to track and to report on bro 5. Run in your browser - `/dev/build` to rebuild the database. 6. Run the following task *http://path.to.silverstripe/dev/tasks/CheckExternalLinks* to check for broken external links +## Report ## + +A new report is added called 'External Broken links report' from here you can also start a new job which is run +via AJAX and in batches of 10 so it can be run via content editors who do not have access to jobs or tasks. + ## Dev task ## Run the following task *http://path.to.silverstripe/dev/tasks/CheckExternalLinks* to check your site for external broken links. + +## Queued job ## + If you have the queuedjobs module installed you can set the task to be run every so ofter Add the following yml config to config.yml in mysite/_config have the the task run once every day (86400 seconds) -`--- -Name: externallinkssettings ---- +``` CheckExternalLinks: - Delay: 86400` + Delay: 86400 +``` diff --git a/code/controllers/CMSExternalLinks.php b/code/controllers/CMSExternalLinks.php index e6769d1..5c23815 100644 --- a/code/controllers/CMSExternalLinks.php +++ b/code/controllers/CMSExternalLinks.php @@ -2,19 +2,69 @@ class CMSExternalLinks_Controller extends Controller { - private static $allowed_actions = array('createQueuedReport'); + private static $allowed_actions = array('getJobStatus', 'clear', 'start'); + /* + * Respond to Ajax requests for info on a running job + * also calls continueJob and clear depending on the status of the job + * + * @return string JSON string detailing status of the job + */ + public function getJobStatus() { + $trackID = Session::get('ExternalLinksTrackID'); + if (!$trackID) return; + $noPages = Versioned::get_by_stage('SiteTree', 'Live')->count(); + $result = BrokenExternalPageTrack::get() + ->filter('TrackID', $trackID) + ->exclude('PageID', 0); + $completedPages = count($result); - public function createQueuedReport() { - if (!Permission::check('ADMIN')) return; + echo json_encode(array( + 'TrackID' => $trackID, + 'Completed' => $completedPages, + 'Total' => $noPages + )); - // setup external links job - $externalLinks = new CheckExternalLinksJob(); - $job = singleton('QueuedJobService'); - $jobID = $job->queueJob($externalLinks); + if ($completedPages >= $noPages) { + $this->clear(); + } else { + $this->continueJob(); + } + } + + /* + * Clears the tracking id and any surplus entries for the BrokenExternalPageTrack model + */ + public function clear() { + // clear any old entries + $trackID = Session::get('ExternalLinksTrackID'); + $oldEntries = BrokenExternalPageTrack::get() + ->exclude('TrackID', $trackID); + foreach ($oldEntries as $entry) { + $entry->delete(); + } + Session::clear('ExternalLinksTrackID'); + } + + /* + * Starts a broken external link check + */ + public function start() { + $track = BrokenExternalPageTrack::create(); + $track->write(); + $track->TrackID = $track->ID; + $track->write(); + + Session::set('ExternalLinksTrackID', $track->ID); + + $this->continueJob(); + } - // redirect to the jobs page - $admin = QueuedJobsAdmin::create(); - $this->Redirect($admin->Link()); + /* + * Continues a broken external link check + */ + public function continueJob() { + $task = new CheckExternalLinks(); + $task->run(null); } } diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index ba569ca..af76fc2 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -56,7 +56,8 @@ public function process() { } $task = new CheckExternalLinks(); - $task->run($page); + $task->pageToProcess = $page; + $task->run(); // and now we store the new list of remaining children $this->pagesToProcess = $remainingPages; diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php index 1de3554..89aeeb8 100644 --- a/code/model/BrokenExternalLink.php +++ b/code/model/BrokenExternalLink.php @@ -31,3 +31,13 @@ function canView($member = false) { return Permission::checkMember($member, $codes); } } + +class BrokenExternalPageTrack extends DataObject { + private static $db = array( + 'TrackID' => 'Int' + ); + + private static $has_one = array( + 'Page' => 'Page' + ); +} diff --git a/code/reports/BrokenExternalLinksReport.php b/code/reports/BrokenExternalLinksReport.php index 4bfe568..9e54a46 100644 --- a/code/reports/BrokenExternalLinksReport.php +++ b/code/reports/BrokenExternalLinksReport.php @@ -66,19 +66,20 @@ public function sourceRecords() { } public function getCMSFields() { + Requirements::javascript('externallinks/javascript/BrokenExternalLinksReport.js'); $fields = parent::getCMSFields(); if (class_exists('AbstractQueuedJob')) { - $button = ''; + $button = ''; $runReportButton = new LiteralField( 'runReport', sprintf( $button, - 'admin/externallinks/createQueuedReport', _t('ExternalBrokenLinksReport.RUNREPORT', 'Create new report') ) ); $fields->push($runReportButton); - $reportResultSpan = ''; + + $reportResultSpan = '

'; $reportResult = new LiteralField('ResultTitle', $reportResultSpan); $fields->push($reportResult); } diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index a6ef0b0..b0e2d40 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -1,6 +1,7 @@ completedPages; - } - - public function getTotalPages() { - return $this->totalPages; - } - function run($request) { - if (isset($request->ID)) { - $pages = $request; + $trackID = Session::get('ExternalLinksTrackID'); + if (isset($this->pageToProcess)) { + $pages = $this->pageToProcess; } else { - $pages = Versioned::get_by_stage('SiteTree', 'Live'); + if ($trackID) { + $result = BrokenExternalPageTrack::get() + ->filter('TrackID', $trackID); + $pages = Versioned::get_by_stage('SiteTree', 'Live') + ->exclude('ID', $result->column('PageID')) + ->limit(10); + } else { + $pages = Versioned::get_by_stage('SiteTree', 'Live'); + } } foreach ($pages as $page) { ++$this->totalPages; @@ -93,6 +95,12 @@ function run($request) { } } ++$this->completedPages; + if ($trackID) { + $trackPage = new BrokenExternalPageTrack(); + $trackPage->PageID = $page->ID; + $trackPage->TrackID = $trackID; + $trackPage->write(); + } } // run this again if queued jobs exists and is a valid int diff --git a/javascript/BrokenExternalLinksReport.js b/javascript/BrokenExternalLinksReport.js new file mode 100644 index 0000000..9cf4a32 --- /dev/null +++ b/javascript/BrokenExternalLinksReport.js @@ -0,0 +1,45 @@ +(function($) { + $('#externalLinksReport').entwine({ + onclick: function() { + $(this).start(); + $(this).poll(); + }, + start: function() { + // initiate a new job + $('#ReportHolder').empty(); + $('#ReportHolder').text('Running report 0%'); + $('#ReportHolder').append(''); + $.ajax({url: "admin/externallinks/start", async: true, timeout: 1000 }); + }, + poll: function() { + // poll the current job and update the front end status + $.ajax({ + url: "admin/externallinks/getJobStatus", + async: true, + success: function(data) { + var obj = $.parseJSON(data); + if (!obj) return; + var completed = obj.Completed ? obj.Completed : 0; + var total = obj.Total ? obj.Total : 0; + if (total > 0 && completed == total) { + $('#ReportHolder').text('Report Finished ' + completed + '/' + total); + } else { + setTimeout(function() { $('#externalLinksReport').poll(); }, 1); + } + if (total && completed) { + if (completed < total) { + var percent = (completed / total) * 100; + $('#ReportHolder').text('Running report ' + completed + '/' + + total + ' (' + percent.toFixed(2) + '%)'); + $('#ReportHolder'). + append(''); + } + } + }, + error: function(e) { + console.log(e); + } + }); + } + }); +}(jQuery)); From 564514b01518d6b91c8fb9f4ac926e19b869770f Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Wed, 30 Jul 2014 15:29:24 +1200 Subject: [PATCH 07/32] NEW: Adding whitelisted codes via yml --- README.md | 18 ++++++++++++++---- code/tasks/CheckExternalLinks.php | 11 ++++++++--- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 6999e68..ead445f 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,17 @@ broken links. If you have the queuedjobs module installed you can set the task to be run every so ofter Add the following yml config to config.yml in mysite/_config have the the task run once every day (86400 seconds) -``` -CheckExternalLinks: - Delay: 86400 -``` + CheckExternalLinks: + Delay: 86400 + +## Whitelisting codes ## + +If you want to ignore or whitelist certain http codes this can be setup via IgnoreCodes in the config.yml +file in mysite/_config + + CheckExternalLinks: + Delay: 60 + IgnoreCodes: + - 401 + - 403 + - 501 diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index b0e2d40..e63815b 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -61,9 +61,14 @@ function run($request) { $response = curl_exec($handle); $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); curl_close($handle); - if (($httpCode < 200 || $httpCode > 302) - || ($href == '' || $href[0] == '/')) - { + // do we have any whitelisted codes + $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); + // if the code is whitelisted set it to 200 + $httpCode = (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) ? + 200 : $httpCode; + + // ignore empty hrefs and internal links + if (($httpCode < 200 || $httpCode > 302) || ($href == '' || $href[0] == '/')) { $brokenLink = new BrokenExternalLink(); $brokenLink->PageID = $page->ID; $brokenLink->Link = $href; From e9fe1a470720b29ebb1436b93a6f43e05a783c94 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Thu, 31 Jul 2014 16:49:20 +1200 Subject: [PATCH 08/32] NEW: Use DB row for job status and refactor the sql statements --- code/controllers/CMSExternalLinks.php | 71 ++++-------- code/jobs/CheckExternalLinksJob.php | 43 +------- code/model/BrokenExternalLink.php | 12 ++- code/reports/BrokenExternalLinksReport.php | 28 ++--- code/tasks/CheckExternalLinks.php | 120 ++++++++++++++++++--- composer.json | 3 + javascript/BrokenExternalLinksReport.js | 15 ++- tests/ExternalLinksTest.php | 18 ++-- tests/ExternalLinksTest.yml | 2 +- 9 files changed, 180 insertions(+), 132 deletions(-) diff --git a/code/controllers/CMSExternalLinks.php b/code/controllers/CMSExternalLinks.php index 5c23815..8e3ea1a 100644 --- a/code/controllers/CMSExternalLinks.php +++ b/code/controllers/CMSExternalLinks.php @@ -2,69 +2,44 @@ class CMSExternalLinks_Controller extends Controller { - private static $allowed_actions = array('getJobStatus', 'clear', 'start'); + private static $allowed_actions = array('getJobStatus', 'start'); /* * Respond to Ajax requests for info on a running job - * also calls continueJob and clear depending on the status of the job * * @return string JSON string detailing status of the job */ public function getJobStatus() { - $trackID = Session::get('ExternalLinksTrackID'); - if (!$trackID) return; - $noPages = Versioned::get_by_stage('SiteTree', 'Live')->count(); - $result = BrokenExternalPageTrack::get() - ->filter('TrackID', $trackID) - ->exclude('PageID', 0); - $completedPages = count($result); - + $track = CheckExternalLinks::getLatestTrack(); + if (!$track || !$track->exists()) return null; echo json_encode(array( - 'TrackID' => $trackID, - 'Completed' => $completedPages, - 'Total' => $noPages + 'TrackID' => $track->ID, + 'Status' => $track->Status, + 'Completed' => $track->CompletedPages, + 'Total' => $track->TotalPages )); - - if ($completedPages >= $noPages) { - $this->clear(); - } else { - $this->continueJob(); - } } - /* - * Clears the tracking id and any surplus entries for the BrokenExternalPageTrack model - */ - public function clear() { - // clear any old entries - $trackID = Session::get('ExternalLinksTrackID'); - $oldEntries = BrokenExternalPageTrack::get() - ->exclude('TrackID', $trackID); - foreach ($oldEntries as $entry) { - $entry->delete(); - } - Session::clear('ExternalLinksTrackID'); - } /* * Starts a broken external link check */ public function start() { - $track = BrokenExternalPageTrack::create(); - $track->write(); - $track->TrackID = $track->ID; - $track->write(); - - Session::set('ExternalLinksTrackID', $track->ID); - - $this->continueJob(); - } - - /* - * Continues a broken external link check - */ - public function continueJob() { - $task = new CheckExternalLinks(); - $task->run(null); + $status = checkExternalLinks::getLatestTrackStatus(); + // return if the a job is already running + if ($status == 'Running') { + return; + } + if (class_exists('QueuedJobService')) { + $checkLinks = new CheckExternalLinksJob(); + singleton('QueuedJobService') + ->queueJob($checkLinks, date('Y-m-d H:i:s', time() + 1)); + } else { + //TODO this hangs as it waits for the connection to be released + // should return back and continue processing + // http://us3.php.net/manual/en/features.connection-handling.php + $task = new CheckExternalLinks(); + $task->run(); + } } } diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index af76fc2..030f1ea 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -8,12 +8,6 @@ */ class CheckExternalLinksJob extends AbstractQueuedJob implements QueuedJob { - public function __construct() { - $this->pagesToProcess = Versioned::get_by_stage('SiteTree', 'Live')->column(); - $this->currentStep = 0; - $this->totalSteps = count($this->pagesToProcess); - } - public function getTitle() { return _t('CheckExternalLiksJob.TITLE', 'Checking for external broken links'); } @@ -26,47 +20,14 @@ public function getSignature() { return md5(get_class($this)); } - public function setup() { - parent::setup(); - $restart = $this->currentStep == 0; - if ($restart) { - $this->pagesToProcess = Versioned::get_by_stage('SiteTree', 'Live')->column(); - } - - } - /** * Check a individual page */ public function process() { - $remainingPages = $this->pagesToProcess; - if (!count($remainingPages)) { - $this->isComplete = true; - return; - } - - // lets process our first item - note that we take it off the list of things left to do - $ID = array_shift($remainingPages); - - // get the page - $page = Versioned::get_by_stage('SiteTree', 'Live', 'ID = '.$ID); - - if (!$page || !$page->Count()) { - $this->addMessage("Page ID #$ID could not be found, skipping"); - } - $task = new CheckExternalLinks(); - $task->pageToProcess = $page; $task->run(); - - // and now we store the new list of remaining children - $this->pagesToProcess = $remainingPages; - $this->currentStep++; - - if (!count($remainingPages)) { - $this->isComplete = true; - return; - } + $this->isComplete = true; + return; } } diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php index 89aeeb8..a716aea 100644 --- a/code/model/BrokenExternalLink.php +++ b/code/model/BrokenExternalLink.php @@ -32,9 +32,19 @@ function canView($member = false) { } } +class BrokenExternalPageTrackStatus extends DataObject { + private static $db = array( + 'Status' => 'Enum("Completed, Running", "Running")', + 'TotalPages' => 'Int', + 'CompletedPages' => 'Int', + 'JobInfo' => 'Varchar(255)' + ); +} + class BrokenExternalPageTrack extends DataObject { private static $db = array( - 'TrackID' => 'Int' + 'TrackID' => 'Int', + 'Processed' => 'Boolean' ); private static $has_one = array( diff --git a/code/reports/BrokenExternalLinksReport.php b/code/reports/BrokenExternalLinksReport.php index 9e54a46..a57e976 100644 --- a/code/reports/BrokenExternalLinksReport.php +++ b/code/reports/BrokenExternalLinksReport.php @@ -68,21 +68,21 @@ public function sourceRecords() { public function getCMSFields() { Requirements::javascript('externallinks/javascript/BrokenExternalLinksReport.js'); $fields = parent::getCMSFields(); - if (class_exists('AbstractQueuedJob')) { - $button = ''; - $runReportButton = new LiteralField( - 'runReport', - sprintf( - $button, - _t('ExternalBrokenLinksReport.RUNREPORT', 'Create new report') - ) - ); - $fields->push($runReportButton); - $reportResultSpan = '

'; - $reportResult = new LiteralField('ResultTitle', $reportResultSpan); - $fields->push($reportResult); - } + $reportResultSpan = '

'; + $reportResult = new LiteralField('ResultTitle', $reportResultSpan); + $fields->push($reportResult); + + $button = ''; + $runReportButton = new LiteralField( + 'runReport', + sprintf( + $button, + _t('ExternalBrokenLinksReport.RUNREPORT', 'Create new report') + ) + ); + $fields->push($runReportButton); + return $fields; } } diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index e63815b..960987f 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -12,23 +12,63 @@ class CheckExternalLinks extends BuildTask { private $totalPages; function run($request) { - $trackID = Session::get('ExternalLinksTrackID'); - if (isset($this->pageToProcess)) { - $pages = $this->pageToProcess; + $track = CheckExternalLinks::getLatestTrack(); + + // if the script has already been started + if ($track && $track->Status == 'Running') { + $batch = BrokenExternalPageTrack::get() + ->filter(array( + 'TrackID' => $track->ID, + 'Processed' => 0 + ))->limit(10)->column('PageID'); + $pages = Versioned::get_by_stage('SiteTree', 'Live') + ->filter('ID', $batch) + ->limit(10); + $this->updateJobInfo('Fetching pages to check'); + if ($track->CompletedPages == $track->TotalPages) { + $track->Status = 'Completed'; + $track->write(); + $this->updateJobInfo('Setting to completed'); + } + // if the script is to be started } else { - if ($trackID) { - $result = BrokenExternalPageTrack::get() - ->filter('TrackID', $trackID); - $pages = Versioned::get_by_stage('SiteTree', 'Live') - ->exclude('ID', $result->column('PageID')) - ->limit(10); - } else { - $pages = Versioned::get_by_stage('SiteTree', 'Live'); + $pages = Versioned::get_by_stage('SiteTree', 'Live')->column('ID'); + $noPages = count($pages); + + $track = BrokenExternalPageTrackStatus::create(); + $track->TotalPages = $noPages; + $track->write(); + $this->updateJobInfo('Creating new tracking object'); + + foreach ($pages as $page) { + $trackPage = BrokenExternalPageTrack::create(); + $trackPage->PageID = $page; + $trackPage->TrackID = $track->ID; + $trackPage->write(); } + + $batch = BrokenExternalPageTrack::get() + ->filter(array( + 'TrackID' => $track->ID + ))->limit(10)->column('PageID'); + + $pages = Versioned::get_by_stage('SiteTree', 'Live') + ->filter('ID', $batch); } + $trackID = $track->ID; foreach ($pages as $page) { ++$this->totalPages; + if ($track->ID) { + $trackPage = BrokenExternalPageTrack::get() + ->filter(array( + 'PageID' => $page->ID, + 'TrackID' => $track->ID + ))->first(); + $trackPage->Processed = 1; + $trackPage->write(); + } + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); if (!$htmlValue->isValid()) { continue; @@ -100,12 +140,27 @@ function run($request) { } } ++$this->completedPages; - if ($trackID) { - $trackPage = new BrokenExternalPageTrack(); - $trackPage->PageID = $page->ID; - $trackPage->TrackID = $trackID; - $trackPage->write(); + } + + // run this outside the foreach loop to stop it locking DB rows + $this->updateJobInfo('Updating completed pages'); + $this->updateCompletedPages($trackID); + + // do we need to carry on running the job + $track = $this->getLatestTrack(); + if ($track->CompletedPages >= $track->TotalPages) { + $track->Status = 'Completed'; + $track->write(); + + // clear any old previous data + $rows = BrokenExternalPageTrack::get() + ->exclude('TrackID', $track->ID); + foreach ($rows as $row) { + $row->delete(); } + } else { + $this->updateJobInfo("Running next batch {$track->CompletedPages}/{$track->TotalPages}"); + $this->run($request); } // run this again if queued jobs exists and is a valid int @@ -115,6 +170,39 @@ function run($request) { singleton('QueuedJobService') ->queueJob($checkLinks, date('Y-m-d H:i:s', time() + $queuedJob)); } + } + + public static function getLatestTrack() { + $track = BrokenExternalPageTrackStatus::get()->sort('ID', 'DESC')->first(); + if (!$track || !$track->exists()) return null; + return $track; + } + + public static function getLatestTrackID() { + $track = CheckExternalLinks::getLatestTrack(); + if (!$track || !$track->exists()) return null; + return $track->ID; + } + + public static function getLatestTrackStatus() { + $track = CheckExternalLinks::getLatestTrack(); + if (!$track || !$track->exists()) return null; + return $track->Status; + } + + private function updateCompletedPages($trackID = 0) { + $noPages = BrokenExternalPageTrack::get() + ->filter(array('TrackID' => $trackID, 'Processed' => 1))->count(); + $track = $this->getLatestTrack($trackID); + $track->CompletedPages = $noPages; + $track->write(); + return $noPages; + } + private function updateJobInfo($message) { + $track = CheckExternalLinks::getLatestTrack(); + if (!$track || !$track->exists()) return null; + $track->JobInfo = $message; + $track->write(); } } diff --git a/composer.json b/composer.json index 7f73048..36506f7 100644 --- a/composer.json +++ b/composer.json @@ -13,5 +13,8 @@ { "silverstripe/framework": ">=3.0", "silverstripe/cms": ">=3.0" + }, + "suggest": { + "silverstripe/queuedjobs": "Speeds up running the job for Content Editors fropm the report" } } diff --git a/javascript/BrokenExternalLinksReport.js b/javascript/BrokenExternalLinksReport.js index 9cf4a32..5dd7046 100644 --- a/javascript/BrokenExternalLinksReport.js +++ b/javascript/BrokenExternalLinksReport.js @@ -4,12 +4,16 @@ $(this).start(); $(this).poll(); }, + onmatch: function() { + $(this).poll(); + }, start: function() { // initiate a new job $('#ReportHolder').empty(); $('#ReportHolder').text('Running report 0%'); $('#ReportHolder').append(''); - $.ajax({url: "admin/externallinks/start", async: true, timeout: 1000 }); + $.ajax({url: "admin/externallinks/start", async: true, timeout: 3000 }); + $(this).poll(); }, poll: function() { // poll the current job and update the front end status @@ -18,13 +22,16 @@ async: true, success: function(data) { var obj = $.parseJSON(data); - if (!obj) return; + if (!obj) { + setTimeout(function() { $('#externalLinksReport').poll(); }, 1000); + } var completed = obj.Completed ? obj.Completed : 0; var total = obj.Total ? obj.Total : 0; - if (total > 0 && completed == total) { + var jobStatus = obj.Status ? obj.Status : 'Running'; + if (jobStatus == 'Completed') { $('#ReportHolder').text('Report Finished ' + completed + '/' + total); } else { - setTimeout(function() { $('#externalLinksReport').poll(); }, 1); + setTimeout(function() { $('#externalLinksReport').poll(); }, 1000); } if (total && completed) { if (completed < total) { diff --git a/tests/ExternalLinksTest.php b/tests/ExternalLinksTest.php index ae9d898..a9b813f 100644 --- a/tests/ExternalLinksTest.php +++ b/tests/ExternalLinksTest.php @@ -4,25 +4,28 @@ class ExternalLinks extends FunctionalTest { protected static $fixture_file = 'ExternalLinksTest.yml'; - public function testWorkingLink() { + public function testLinks() { // uses http://127.0.0.1 to test a working link - $page = $this->objFromFixture('Page', 'working'); + $working = $this->objFromFixture('SiteTree', 'working'); + $working->publish('Stage', 'Live'); $task = new CheckExternalLinks(); - $task->run($page); - $brokenLinks = BrokenExternalLinks::get(); + $task->run(null); + $brokenLinks = BrokenExternalLink::get(); $this->assertEquals(0, $brokenLinks->count()); } public function testBrokenLink() { // uses http://192.0.2.1 for a broken link - $page = $this->objFromFixture('Page', 'broken'); + $broken = $this->objFromFixture('SiteTree', 'broken'); + $broken->publish('Stage', 'Live'); $task = new CheckExternalLinks(); - $task->run($page); - $brokenLinks = BrokenExternalLinks::get(); + $task->run(null); + $brokenLinks = BrokenExternalLink::get(); $this->assertEquals(1, $brokenLinks->count()); } public function testReportExists() { + $mock = $this->objFromFixture('SiteTree', 'broken'); $reports = SS_Report::get_reports(); $reportNames = array(); foreach($reports as $report) { @@ -32,3 +35,4 @@ public function testReportExists() { 'BrokenExternalLinksReport is in reports list'); } } + diff --git a/tests/ExternalLinksTest.yml b/tests/ExternalLinksTest.yml index 8a25106..46eb0a8 100644 --- a/tests/ExternalLinksTest.yml +++ b/tests/ExternalLinksTest.yml @@ -1,4 +1,4 @@ -Page: +SiteTree: working: Title: Working Link Content: 'Localhost' From 9e5a41f6c7f2c27062c8afe1788797ac0dc4b9f2 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Mon, 4 Aug 2014 10:10:59 +1200 Subject: [PATCH 09/32] NEW: Fixing queuedjob status and hiding report button when report is running --- code/jobs/CheckExternalLinksJob.php | 9 ++++++++- code/tasks/CheckExternalLinks.php | 18 ++++++++++++------ javascript/BrokenExternalLinksReport.js | 4 +++- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index 030f1ea..56dd1e3 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -25,7 +25,14 @@ public function getSignature() { */ public function process() { $task = new CheckExternalLinks(); - $task->run(); + $pages = Versioned::get_by_stage('SiteTree', 'Live'); + // set the limit so each page is done individually + $task->limit = 1; + $this->totalSteps = $pages->count(); + foreach ($pages as $page) { + $this->currentStep++; + $task->run(); + } $this->isComplete = true; return; } diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index 960987f..f3df02c 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -1,7 +1,8 @@ filter(array( 'TrackID' => $track->ID, 'Processed' => 0 - ))->limit(10)->column('PageID'); + ))->limit($this->limit)->column('PageID'); $pages = Versioned::get_by_stage('SiteTree', 'Live') ->filter('ID', $batch) - ->limit(10); + ->limit($this->limit); $this->updateJobInfo('Fetching pages to check'); if ($track->CompletedPages == $track->TotalPages) { $track->Status = 'Completed'; @@ -50,7 +51,7 @@ function run($request) { $batch = BrokenExternalPageTrack::get() ->filter(array( 'TrackID' => $track->ID - ))->limit(10)->column('PageID'); + ))->limit($this->limit)->column('PageID'); $pages = Versioned::get_by_stage('SiteTree', 'Live') ->filter('ID', $batch); @@ -159,8 +160,13 @@ function run($request) { $row->delete(); } } else { - $this->updateJobInfo("Running next batch {$track->CompletedPages}/{$track->TotalPages}"); - $this->run($request); + // if running via the queued job module return to the queued job after each iteration + if ($this->limit == 1) { + return; + } else { + $this->updateJobInfo("Running next batch {$track->CompletedPages}/{$track->TotalPages}"); + $this->run($request); + } } // run this again if queued jobs exists and is a valid int diff --git a/javascript/BrokenExternalLinksReport.js b/javascript/BrokenExternalLinksReport.js index 5dd7046..54174f4 100644 --- a/javascript/BrokenExternalLinksReport.js +++ b/javascript/BrokenExternalLinksReport.js @@ -2,7 +2,6 @@ $('#externalLinksReport').entwine({ onclick: function() { $(this).start(); - $(this).poll(); }, onmatch: function() { $(this).poll(); @@ -12,11 +11,13 @@ $('#ReportHolder').empty(); $('#ReportHolder').text('Running report 0%'); $('#ReportHolder').append(''); + $('#externalLinksReport').hide(); $.ajax({url: "admin/externallinks/start", async: true, timeout: 3000 }); $(this).poll(); }, poll: function() { // poll the current job and update the front end status + $('#externalLinksReport').hide(); $.ajax({ url: "admin/externallinks/getJobStatus", async: true, @@ -30,6 +31,7 @@ var jobStatus = obj.Status ? obj.Status : 'Running'; if (jobStatus == 'Completed') { $('#ReportHolder').text('Report Finished ' + completed + '/' + total); + $('#externalLinksReport').show(); } else { setTimeout(function() { $('#externalLinksReport').poll(); }, 1000); } From 363ecd49856c26520085a7442a2478906e46336a Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Mon, 4 Aug 2014 11:20:54 +1200 Subject: [PATCH 10/32] Make sure that process yields to queuedjobs after each page Fix javascript errors starting the job --- code/jobs/CheckExternalLinksJob.php | 16 ++++------- code/tasks/CheckExternalLinks.php | 35 ++++++++++++++----------- javascript/BrokenExternalLinksReport.js | 11 +++++--- 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index 56dd1e3..bb79e67 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -21,20 +21,14 @@ public function getSignature() { } /** - * Check a individual page + * Check an individual page */ public function process() { $task = new CheckExternalLinks(); - $pages = Versioned::get_by_stage('SiteTree', 'Live'); - // set the limit so each page is done individually - $task->limit = 1; - $this->totalSteps = $pages->count(); - foreach ($pages as $page) { - $this->currentStep++; - $task->run(); - } - $this->isComplete = true; - return; + $track = $task->runLinksCheck(1); + $this->currentStep = $track->CompletedPages; + $this->totalSteps = $track->TotalPages; + $this->isComplete = $track->Status === 'Completed'; } } diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index f3df02c..e9e0cc0 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -13,6 +13,16 @@ class CheckExternalLinks extends BuildTask { private $totalPages; function run($request) { + $this->runLinksCheck($this->limit); + } + + /** + * Runs the links checker and returns the track used + * + * @param int $limit Limit to number of pages to run + * @return BrokenExternalPageTrackStatus + */ + public function runLinksCheck($limit) { $track = CheckExternalLinks::getLatestTrack(); // if the script has already been started @@ -21,10 +31,10 @@ function run($request) { ->filter(array( 'TrackID' => $track->ID, 'Processed' => 0 - ))->limit($this->limit)->column('PageID'); + ))->limit($limit)->column('PageID'); $pages = Versioned::get_by_stage('SiteTree', 'Live') ->filter('ID', $batch) - ->limit($this->limit); + ->limit($limit); $this->updateJobInfo('Fetching pages to check'); if ($track->CompletedPages == $track->TotalPages) { $track->Status = 'Completed'; @@ -51,7 +61,7 @@ function run($request) { $batch = BrokenExternalPageTrack::get() ->filter(array( 'TrackID' => $track->ID - ))->limit($this->limit)->column('PageID'); + ))->limit($limit)->column('PageID'); $pages = Versioned::get_by_stage('SiteTree', 'Live') ->filter('ID', $batch); @@ -159,22 +169,15 @@ function run($request) { foreach ($rows as $row) { $row->delete(); } - } else { + return $track; + } + // if running via the queued job module return to the queued job after each iteration - if ($this->limit == 1) { - return; + if ($limit == 1) { + return $track; } else { $this->updateJobInfo("Running next batch {$track->CompletedPages}/{$track->TotalPages}"); - $this->run($request); - } - } - - // run this again if queued jobs exists and is a valid int - $queuedJob = Config::inst()->get('CheckExternalLinks', 'Delay'); - if (isset($queuedJob) && is_int($queuedJob) && class_exists('QueuedJobService')) { - $checkLinks = new CheckExternalLinksJob(); - singleton('QueuedJobService') - ->queueJob($checkLinks, date('Y-m-d H:i:s', time() + $queuedJob)); + return $this->runLinksCheck($limit); } } diff --git a/javascript/BrokenExternalLinksReport.js b/javascript/BrokenExternalLinksReport.js index 54174f4..d44f7b5 100644 --- a/javascript/BrokenExternalLinksReport.js +++ b/javascript/BrokenExternalLinksReport.js @@ -4,6 +4,8 @@ $(this).start(); }, onmatch: function() { + // poll the current job and update the front end status + $('#externalLinksReport').hide(); $(this).poll(); }, start: function() { @@ -16,15 +18,16 @@ $(this).poll(); }, poll: function() { - // poll the current job and update the front end status - $('#externalLinksReport').hide(); $.ajax({ url: "admin/externallinks/getJobStatus", async: true, success: function(data) { var obj = $.parseJSON(data); + + // No report, so let user create one if (!obj) { - setTimeout(function() { $('#externalLinksReport').poll(); }, 1000); + $('#externalLinksReport').show(); + return; } var completed = obj.Completed ? obj.Completed : 0; var total = obj.Total ? obj.Total : 0; @@ -46,7 +49,7 @@ } }, error: function(e) { - console.log(e); + if(typeof console !== 'undefined') console.log(e); } }); } From da7e45ddd41161d90d92948573293a3f0fc03fe2 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Mon, 4 Aug 2014 12:30:08 +1200 Subject: [PATCH 11/32] BUG: Fixing report page link and set to check stage not live --- code/reports/BrokenExternalLinksReport.php | 1 + code/tasks/CheckExternalLinks.php | 8 ++++---- tests/ExternalLinksTest.php | 9 +++++---- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/code/reports/BrokenExternalLinksReport.php b/code/reports/BrokenExternalLinksReport.php index a57e976..34bfbec 100644 --- a/code/reports/BrokenExternalLinksReport.php +++ b/code/reports/BrokenExternalLinksReport.php @@ -60,6 +60,7 @@ public function sourceRecords() { $links = BrokenExternalLink::get(); foreach ($links as $link) { $link->PageLink = $link->Page()->Title; + $link->ID = $link->Page()->ID; $returnSet->push($link); } return $returnSet; diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index e9e0cc0..7d4a605 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -32,7 +32,7 @@ public function runLinksCheck($limit) { 'TrackID' => $track->ID, 'Processed' => 0 ))->limit($limit)->column('PageID'); - $pages = Versioned::get_by_stage('SiteTree', 'Live') + $pages = Versioned::get_by_stage('SiteTree', 'Stage') ->filter('ID', $batch) ->limit($limit); $this->updateJobInfo('Fetching pages to check'); @@ -43,7 +43,7 @@ public function runLinksCheck($limit) { } // if the script is to be started } else { - $pages = Versioned::get_by_stage('SiteTree', 'Live')->column('ID'); + $pages = Versioned::get_by_stage('SiteTree', 'Stage')->column('ID'); $noPages = count($pages); $track = BrokenExternalPageTrackStatus::create(); @@ -63,7 +63,7 @@ public function runLinksCheck($limit) { 'TrackID' => $track->ID ))->limit($limit)->column('PageID'); - $pages = Versioned::get_by_stage('SiteTree', 'Live') + $pages = Versioned::get_by_stage('SiteTree', 'Stage') ->filter('ID', $batch); } $trackID = $track->ID; @@ -139,7 +139,7 @@ public function runLinksCheck($limit) { // bypass the ORM as syncLinkTracking does not allow you // to update HasBrokenLink to true - $query = "UPDATE \"SiteTree_Live\" SET \"HasBrokenLink\" = 1 "; + $query = "UPDATE \"SiteTree\" SET \"HasBrokenLink\" = 1 "; $query .= "WHERE \"ID\" = " . (int)$page->ID; $result = DB::query($query); if (!$result) { diff --git a/tests/ExternalLinksTest.php b/tests/ExternalLinksTest.php index a9b813f..181365a 100644 --- a/tests/ExternalLinksTest.php +++ b/tests/ExternalLinksTest.php @@ -7,17 +7,18 @@ class ExternalLinks extends FunctionalTest { public function testLinks() { // uses http://127.0.0.1 to test a working link $working = $this->objFromFixture('SiteTree', 'working'); - $working->publish('Stage', 'Live'); + $working->publish('Stage', 'Stage'); $task = new CheckExternalLinks(); $task->run(null); - $brokenLinks = BrokenExternalLink::get(); - $this->assertEquals(0, $brokenLinks->count()); + $brokenLinks = BrokenExternalLink::get()->column('Link');; + // confirm the working link has not been added as a broken link + $this->assertNotEquals($working->Link, $brokenLinks[0]); } public function testBrokenLink() { // uses http://192.0.2.1 for a broken link $broken = $this->objFromFixture('SiteTree', 'broken'); - $broken->publish('Stage', 'Live'); + $broken->publish('Stage', 'Stage'); $task = new CheckExternalLinks(); $task->run(null); $brokenLinks = BrokenExternalLink::get(); From 6bda4dcc82fbb6f4df56aa430dcec93c1435a609 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Mon, 4 Aug 2014 17:38:28 +1200 Subject: [PATCH 12/32] BUG: Correcting test code and fixing bug with create report button --- code/controllers/CMSExternalLinks.php | 15 +++++++++++++++ javascript/BrokenExternalLinksReport.js | 10 +++++----- tests/ExternalLinksTest.php | 4 ++-- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/code/controllers/CMSExternalLinks.php b/code/controllers/CMSExternalLinks.php index 8e3ea1a..8579bdd 100644 --- a/code/controllers/CMSExternalLinks.php +++ b/code/controllers/CMSExternalLinks.php @@ -31,6 +31,21 @@ public function start() { return; } if (class_exists('QueuedJobService')) { + $pages = Versioned::get_by_stage('SiteTree', 'Stage'); + $noPages = count($pages); + + $track = BrokenExternalPageTrackStatus::create(); + $track->TotalPages = $noPages; + $track->Status = 'Running'; + $track->write(); + + foreach ($pages as $page) { + $trackPage = BrokenExternalPageTrack::create(); + $trackPage->PageID = $page->ID; + $trackPage->TrackID = $track->ID; + $trackPage->write(); + } + $checkLinks = new CheckExternalLinksJob(); singleton('QueuedJobService') ->queueJob($checkLinks, date('Y-m-d H:i:s', time() + 1)); diff --git a/javascript/BrokenExternalLinksReport.js b/javascript/BrokenExternalLinksReport.js index d44f7b5..ff9acfb 100644 --- a/javascript/BrokenExternalLinksReport.js +++ b/javascript/BrokenExternalLinksReport.js @@ -6,7 +6,7 @@ onmatch: function() { // poll the current job and update the front end status $('#externalLinksReport').hide(); - $(this).poll(); + $(this).poll(0); }, start: function() { // initiate a new job @@ -15,9 +15,9 @@ $('#ReportHolder').append(''); $('#externalLinksReport').hide(); $.ajax({url: "admin/externallinks/start", async: true, timeout: 3000 }); - $(this).poll(); + $(this).poll(1); }, - poll: function() { + poll: function(start) { $.ajax({ url: "admin/externallinks/getJobStatus", async: true, @@ -32,11 +32,11 @@ var completed = obj.Completed ? obj.Completed : 0; var total = obj.Total ? obj.Total : 0; var jobStatus = obj.Status ? obj.Status : 'Running'; - if (jobStatus == 'Completed') { + if (jobStatus == 'Completed' && start == 0) { $('#ReportHolder').text('Report Finished ' + completed + '/' + total); $('#externalLinksReport').show(); } else { - setTimeout(function() { $('#externalLinksReport').poll(); }, 1000); + setTimeout(function() { $('#externalLinksReport').poll(0); }, 1000); } if (total && completed) { if (completed < total) { diff --git a/tests/ExternalLinksTest.php b/tests/ExternalLinksTest.php index 181365a..6c7bf61 100644 --- a/tests/ExternalLinksTest.php +++ b/tests/ExternalLinksTest.php @@ -7,7 +7,7 @@ class ExternalLinks extends FunctionalTest { public function testLinks() { // uses http://127.0.0.1 to test a working link $working = $this->objFromFixture('SiteTree', 'working'); - $working->publish('Stage', 'Stage'); + $working->write(); $task = new CheckExternalLinks(); $task->run(null); $brokenLinks = BrokenExternalLink::get()->column('Link');; @@ -18,7 +18,7 @@ public function testLinks() { public function testBrokenLink() { // uses http://192.0.2.1 for a broken link $broken = $this->objFromFixture('SiteTree', 'broken'); - $broken->publish('Stage', 'Stage'); + $broken->write(); $task = new CheckExternalLinks(); $task->run(null); $brokenLinks = BrokenExternalLink::get(); From f55a650d244f27f8f9caf733ec82bc1d18395a89 Mon Sep 17 00:00:00 2001 From: Kirk Mayo Date: Wed, 6 Aug 2014 15:11:23 +1200 Subject: [PATCH 13/32] NEW: Plat-59 Do not display broken previous report info for the latest report --- code/model/BrokenExternalLink.php | 3 ++- code/reports/BrokenExternalLinksReport.php | 11 +++++++++-- code/tasks/CheckExternalLinks.php | 1 + 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php index a716aea..24ff85f 100644 --- a/code/model/BrokenExternalLink.php +++ b/code/model/BrokenExternalLink.php @@ -8,7 +8,8 @@ class BrokenExternalLink extends DataObject { ); private static $has_one = array( - 'Page' => 'Page' + 'Page' => 'Page', + 'Track' => 'BrokenExternalLink' ); public static $summary_fields = array( diff --git a/code/reports/BrokenExternalLinksReport.php b/code/reports/BrokenExternalLinksReport.php index 34bfbec..c908820 100644 --- a/code/reports/BrokenExternalLinksReport.php +++ b/code/reports/BrokenExternalLinksReport.php @@ -34,7 +34,8 @@ public function init() { * @return string */ public function title() { - return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS',"External broken links report"); + return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS', + "External broken links report"); } /** @@ -56,8 +57,14 @@ public function getColumns() { } public function sourceRecords() { + $track = CheckExternalLinks::getLatestTrack(); $returnSet = new ArrayList(); - $links = BrokenExternalLink::get(); + if ($track && $track->exists()) { + $links = BrokenExternalLink::get() + ->filter('TrackID', $track->ID); + } else { + $links = BrokenExternalLink::get(); + } foreach ($links as $link) { $link->PageLink = $link->Page()->Title; $link->ID = $link->Page()->ID; diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php index 7d4a605..4fa8dce 100644 --- a/code/tasks/CheckExternalLinks.php +++ b/code/tasks/CheckExternalLinks.php @@ -124,6 +124,7 @@ public function runLinksCheck($limit) { $brokenLink->PageID = $page->ID; $brokenLink->Link = $href; $brokenLink->HTTPCode = $httpCode; + $brokenLink->TrackID = $track->ID; $brokenLink->write(); // set the broken link class From a4ede246ab6c21e59f83ce8506c72b91a55a0325 Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Thu, 7 Aug 2014 13:56:15 +1200 Subject: [PATCH 14/32] Tests, refactor, cleanup, fix issues --- _config.php | 1 - _config/injector.yml | 5 + code/controllers/CMSExternalLinks.php | 52 ++--- code/jobs/CheckExternalLinksJob.php | 2 +- code/model/BrokenExternalLink.php | 35 ++- code/model/BrokenExternalPageTrack.php | 28 +++ code/model/BrokenExternalPageTrackStatus.php | 128 +++++++++++ code/reports/BrokenExternalLinksReport.php | 73 +++---- code/tasks/CheckExternalLinks.php | 218 ------------------- code/tasks/CheckExternalLinksTask.php | 204 +++++++++++++++++ code/tasks/CurlLinkChecker.php | 49 +++++ code/tasks/LinkChecker.php | 15 ++ composer.json | 7 +- javascript/BrokenExternalLinksReport.js | 108 ++++----- tests/ExternalLinksTest.php | 140 ++++++++++-- tests/ExternalLinksTest.yml | 63 +++++- 16 files changed, 737 insertions(+), 391 deletions(-) delete mode 100644 _config.php create mode 100644 _config/injector.yml create mode 100644 code/model/BrokenExternalPageTrack.php create mode 100644 code/model/BrokenExternalPageTrackStatus.php delete mode 100644 code/tasks/CheckExternalLinks.php create mode 100644 code/tasks/CheckExternalLinksTask.php create mode 100644 code/tasks/CurlLinkChecker.php create mode 100644 code/tasks/LinkChecker.php diff --git a/_config.php b/_config.php deleted file mode 100644 index b3d9bbc..0000000 --- a/_config.php +++ /dev/null @@ -1 +0,0 @@ -exists()) return null; - echo json_encode(array( + // Set headers + HTTP::set_cache_age(0); + HTTP::add_cache_headers($this->response); + $this->response + ->addHeader('Content-Type', 'application/json') + ->addHeader('Content-Encoding', 'UTF-8') + ->addHeader('X-Content-Type-Options', 'nosniff'); + + // Format status + $track = BrokenExternalPageTrackStatus::get_latest(); + if($track) return json_encode(array( 'TrackID' => $track->ID, 'Status' => $track->Status, - 'Completed' => $track->CompletedPages, - 'Total' => $track->TotalPages + 'Completed' => $track->getCompletedPages(), + 'Total' => $track->getTotalPages() )); } @@ -25,36 +33,22 @@ public function getJobStatus() { * Starts a broken external link check */ public function start() { - $status = checkExternalLinks::getLatestTrackStatus(); // return if the a job is already running - if ($status == 'Running') { - return; - } - if (class_exists('QueuedJobService')) { - $pages = Versioned::get_by_stage('SiteTree', 'Stage'); - $noPages = count($pages); - - $track = BrokenExternalPageTrackStatus::create(); - $track->TotalPages = $noPages; - $track->Status = 'Running'; - $track->write(); - - foreach ($pages as $page) { - $trackPage = BrokenExternalPageTrack::create(); - $trackPage->PageID = $page->ID; - $trackPage->TrackID = $track->ID; - $trackPage->write(); - } + $status = BrokenExternalPageTrackStatus::get_latest(); + if ($status && $status->Status == 'Running') return; + // Create a new job + if (class_exists('QueuedJobService')) { + // Force the creation of a new run + BrokenExternalPageTrackStatus::create_status(); $checkLinks = new CheckExternalLinksJob(); - singleton('QueuedJobService') - ->queueJob($checkLinks, date('Y-m-d H:i:s', time() + 1)); + singleton('QueuedJobService')->queueJob($checkLinks); } else { //TODO this hangs as it waits for the connection to be released - // should return back and continue processing + // should return back and continue processing // http://us3.php.net/manual/en/features.connection-handling.php - $task = new CheckExternalLinks(); - $task->run(); + $task = CheckExternalLinksTask::create(); + $task->runLinksCheck(); } } } diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index bb79e67..3f4311a 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -24,7 +24,7 @@ public function getSignature() { * Check an individual page */ public function process() { - $task = new CheckExternalLinks(); + $task = CheckExternalLinksTask::create(); $track = $task->runLinksCheck(1); $this->currentStep = $track->CompletedPages; $this->totalSteps = $track->TotalPages; diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php index 24ff85f..7d21e07 100644 --- a/code/model/BrokenExternalLink.php +++ b/code/model/BrokenExternalLink.php @@ -1,5 +1,11 @@ 'Page', - 'Track' => 'BrokenExternalLink' + 'Track' => 'BrokenExternalPageTrack', + 'Status' => 'BrokenExternalPageTrackStatus' ); + /** + * @return SiteTree + */ + public function Page() { + return $this->Track()->Page(); + } + public static $summary_fields = array( 'Page.Title' => 'Page', 'HTTPCode' => 'HTTP Code', @@ -33,22 +46,4 @@ function canView($member = false) { } } -class BrokenExternalPageTrackStatus extends DataObject { - private static $db = array( - 'Status' => 'Enum("Completed, Running", "Running")', - 'TotalPages' => 'Int', - 'CompletedPages' => 'Int', - 'JobInfo' => 'Varchar(255)' - ); -} -class BrokenExternalPageTrack extends DataObject { - private static $db = array( - 'TrackID' => 'Int', - 'Processed' => 'Boolean' - ); - - private static $has_one = array( - 'Page' => 'Page' - ); -} diff --git a/code/model/BrokenExternalPageTrack.php b/code/model/BrokenExternalPageTrack.php new file mode 100644 index 0000000..828b541 --- /dev/null +++ b/code/model/BrokenExternalPageTrack.php @@ -0,0 +1,28 @@ + 'Boolean' + ); + + private static $has_one = array( + 'Page' => 'SiteTree', + 'Status' => 'BrokenExternalPageTrackStatus' + ); + + private static $has_many = array( + 'BrokenLinks' => 'BrokenExternalLink' + ); + + /** + * @return SiteTree + */ + public function Page() { + return Versioned::get_by_stage('SiteTree', 'Stage') + ->byID($this->PageID); + } +} diff --git a/code/model/BrokenExternalPageTrackStatus.php b/code/model/BrokenExternalPageTrackStatus.php new file mode 100644 index 0000000..141b996 --- /dev/null +++ b/code/model/BrokenExternalPageTrackStatus.php @@ -0,0 +1,128 @@ + 'Enum("Completed, Running", "Running")', + 'JobInfo' => 'Varchar(255)' + ); + + private static $has_many = array( + 'TrackedPages' => 'BrokenExternalPageTrack', + 'BrokenLinks' => 'BrokenExternalLink' + ); + + /** + * Get the latest track status + * + * @return self + */ + public static function get_latest() { + return self::get() + ->sort('ID', 'DESC') + ->first(); + } + + /** + * Gets the list of Pages yet to be checked + * + * @return DataList + */ + public function getIncompletePageList() { + $pageIDs = $this + ->getIncompleteTracks() + ->column('PageID'); + if($pageIDs) return Versioned::get_by_stage('SiteTree', 'Stage') + ->byIDs($pageIDs); + } + + /** + * Get the list of incomplete BrokenExternalPageTrack + * + * @return DataList + */ + public function getIncompleteTracks() { + return $this + ->TrackedPages() + ->filter('Processed', 0); + } + + /** + * Get total pages count + */ + public function getTotalPages() { + return $this->TrackedPages()->count(); + } + + /** + * Get completed pages count + */ + public function getCompletedPages() { + return $this + ->TrackedPages() + ->filter('Processed', 1) + ->count(); + } + + /** + * Returns the latest run, or otherwise creates a new one + * + * @return self + */ + public static function get_or_create() { + // Check the current status + $status = self::get_latest(); + if ($status && $status->Status == 'Running') { + $status->updateStatus(); + return $status; + } + + return self::create_status(); + } + + /* + * Create and prepare a new status + * + * @return self + */ + public static function create_status() { + // If the script is to be started create a new status + $status = self::create(); + $status->updateJobInfo('Creating new tracking object'); + + // Setup all pages to test + $pageIDs = Versioned::get_by_stage('SiteTree', 'Stage') + ->column('ID'); + foreach ($pageIDs as $pageID) { + $trackPage = BrokenExternalPageTrack::create(); + $trackPage->PageID = $pageID; + $trackPage->StatusID = $status->ID; + $trackPage->write(); + } + + return $status; + } + + public function updateJobInfo($message) { + $this->JobInfo = $message; + $this->write(); + } + + /** + * Self check status + */ + public function updateStatus() { + if ($this->CompletedPages == $this->TotalPages) { + $this->Status = 'Completed'; + $this->updateJobInfo('Setting to completed'); + } + } +} \ No newline at end of file diff --git a/code/reports/BrokenExternalLinksReport.php b/code/reports/BrokenExternalLinksReport.php index c908820..5809cc6 100644 --- a/code/reports/BrokenExternalLinksReport.php +++ b/code/reports/BrokenExternalLinksReport.php @@ -8,43 +8,41 @@ class BrokenExternalLinksReport extends SS_Report { - /** - * Columns in the report - * - * @var array - * @config - */ - private static $columns = array( - 'Created' => 'Checked', - 'Link' => 'External Link', - 'HTTPCode' => 'HTTP Error Code', - 'PageLink' => array( - 'title' => 'Page link is on', - 'link' => true - ), - ); - - public function init() { - parent::init(); - } - /** * Returns the report title - * + * * @return string */ public function title() { - return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS', - "External broken links report"); + return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS', "External broken links report"); } - /** - * Returns the column names of the report - * - * @return array - */ public function columns() { - return self::$columns; + return array( + "Created" => "Checked", + 'Link' => array( + 'title' => 'External Link', + 'formatting' => function($value, $item) { + return sprintf( + '%s', + Convert::raw2att($item->Link), + Convert::raw2xml($item->Link) + ); + } + ), + 'HTTPCode' => 'HTTP Error Code', + "Title" => array( + "title" => 'Page link is on', + 'formatting' => function($value, $item) { + $page = $item->Page(); + return sprintf( + '%s', + Convert::raw2att($page->CMSEditLink()), + Convert::raw2xml($page->Title) + ); + } + ) + ); } /** @@ -57,20 +55,9 @@ public function getColumns() { } public function sourceRecords() { - $track = CheckExternalLinks::getLatestTrack(); - $returnSet = new ArrayList(); - if ($track && $track->exists()) { - $links = BrokenExternalLink::get() - ->filter('TrackID', $track->ID); - } else { - $links = BrokenExternalLink::get(); - } - foreach ($links as $link) { - $link->PageLink = $link->Page()->Title; - $link->ID = $link->Page()->ID; - $returnSet->push($link); - } - return $returnSet; + $track = BrokenExternalPageTrackStatus::get_latest(); + if ($track) return $track->BrokenLinks(); + return new ArrayList(); } public function getCMSFields() { diff --git a/code/tasks/CheckExternalLinks.php b/code/tasks/CheckExternalLinks.php deleted file mode 100644 index 4fa8dce..0000000 --- a/code/tasks/CheckExternalLinks.php +++ /dev/null @@ -1,218 +0,0 @@ -runLinksCheck($this->limit); - } - - /** - * Runs the links checker and returns the track used - * - * @param int $limit Limit to number of pages to run - * @return BrokenExternalPageTrackStatus - */ - public function runLinksCheck($limit) { - $track = CheckExternalLinks::getLatestTrack(); - - // if the script has already been started - if ($track && $track->Status == 'Running') { - $batch = BrokenExternalPageTrack::get() - ->filter(array( - 'TrackID' => $track->ID, - 'Processed' => 0 - ))->limit($limit)->column('PageID'); - $pages = Versioned::get_by_stage('SiteTree', 'Stage') - ->filter('ID', $batch) - ->limit($limit); - $this->updateJobInfo('Fetching pages to check'); - if ($track->CompletedPages == $track->TotalPages) { - $track->Status = 'Completed'; - $track->write(); - $this->updateJobInfo('Setting to completed'); - } - // if the script is to be started - } else { - $pages = Versioned::get_by_stage('SiteTree', 'Stage')->column('ID'); - $noPages = count($pages); - - $track = BrokenExternalPageTrackStatus::create(); - $track->TotalPages = $noPages; - $track->write(); - $this->updateJobInfo('Creating new tracking object'); - - foreach ($pages as $page) { - $trackPage = BrokenExternalPageTrack::create(); - $trackPage->PageID = $page; - $trackPage->TrackID = $track->ID; - $trackPage->write(); - } - - $batch = BrokenExternalPageTrack::get() - ->filter(array( - 'TrackID' => $track->ID - ))->limit($limit)->column('PageID'); - - $pages = Versioned::get_by_stage('SiteTree', 'Stage') - ->filter('ID', $batch); - } - $trackID = $track->ID; - foreach ($pages as $page) { - ++$this->totalPages; - - if ($track->ID) { - $trackPage = BrokenExternalPageTrack::get() - ->filter(array( - 'PageID' => $page->ID, - 'TrackID' => $track->ID - ))->first(); - $trackPage->Processed = 1; - $trackPage->write(); - } - - $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); - if (!$htmlValue->isValid()) { - continue; - } - - // Populate link tracking for internal links & links to asset files. - if($links = $htmlValue->getElementsByTagName('a')) foreach($links as $link) { - $class = $link->getAttribute('class'); - $pos = stripos($class, 'ss-broken'); - if ($pos !== false && $page->HasBrokenLink == 1) continue; - - $href = Director::makeRelative($link->getAttribute('href')); - if ($href == 'admin/') continue; - - // ignore SiteTree, anchor and assets links as they will be caught - // by SiteTreeLinkTracking - if(preg_match('/\[(file_link|sitetree_link),id=([0-9]+)\]/i', $href, $matches)) { - continue; - } else if (isset($href[0]) && $href[0] == '#') { - continue; - } else if(substr($href, 0, strlen(ASSETS_DIR) + 1) == ASSETS_DIR.'/') { - continue; - } - - if($href && function_exists('curl_init')) { - $handle = curl_init($href); - curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); - curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); - curl_setopt($handle, CURLOPT_TIMEOUT, 10); - $response = curl_exec($handle); - $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); - curl_close($handle); - // do we have any whitelisted codes - $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); - // if the code is whitelisted set it to 200 - $httpCode = (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) ? - 200 : $httpCode; - - // ignore empty hrefs and internal links - if (($httpCode < 200 || $httpCode > 302) || ($href == '' || $href[0] == '/')) { - $brokenLink = new BrokenExternalLink(); - $brokenLink->PageID = $page->ID; - $brokenLink->Link = $href; - $brokenLink->HTTPCode = $httpCode; - $brokenLink->TrackID = $track->ID; - $brokenLink->write(); - - // set the broken link class - $class = ($class && stripos($class, 'ss-broken')) ? - $class . ' ss-broken' : 'ss-broken'; - $link->setAttribute('class', ($class ? $class : 'ss-broken')); - $htmlValue->__call('saveHTML', array()); - - $page->Content = $htmlValue->getContent(); - $page->owner->write(); - - if (!$page->HasBrokenLink) { - - // bypass the ORM as syncLinkTracking does not allow you - // to update HasBrokenLink to true - $query = "UPDATE \"SiteTree\" SET \"HasBrokenLink\" = 1 "; - $query .= "WHERE \"ID\" = " . (int)$page->ID; - $result = DB::query($query); - if (!$result) { - $this->debugMessage('Error updating HasBrokenLink'); - } - } - - } - } - } - ++$this->completedPages; - } - - // run this outside the foreach loop to stop it locking DB rows - $this->updateJobInfo('Updating completed pages'); - $this->updateCompletedPages($trackID); - - // do we need to carry on running the job - $track = $this->getLatestTrack(); - if ($track->CompletedPages >= $track->TotalPages) { - $track->Status = 'Completed'; - $track->write(); - - // clear any old previous data - $rows = BrokenExternalPageTrack::get() - ->exclude('TrackID', $track->ID); - foreach ($rows as $row) { - $row->delete(); - } - return $track; - } - - // if running via the queued job module return to the queued job after each iteration - if ($limit == 1) { - return $track; - } else { - $this->updateJobInfo("Running next batch {$track->CompletedPages}/{$track->TotalPages}"); - return $this->runLinksCheck($limit); - } - } - - public static function getLatestTrack() { - $track = BrokenExternalPageTrackStatus::get()->sort('ID', 'DESC')->first(); - if (!$track || !$track->exists()) return null; - return $track; - } - - public static function getLatestTrackID() { - $track = CheckExternalLinks::getLatestTrack(); - if (!$track || !$track->exists()) return null; - return $track->ID; - } - - public static function getLatestTrackStatus() { - $track = CheckExternalLinks::getLatestTrack(); - if (!$track || !$track->exists()) return null; - return $track->Status; - } - - private function updateCompletedPages($trackID = 0) { - $noPages = BrokenExternalPageTrack::get() - ->filter(array('TrackID' => $trackID, 'Processed' => 1))->count(); - $track = $this->getLatestTrack($trackID); - $track->CompletedPages = $noPages; - $track->write(); - return $noPages; - } - - private function updateJobInfo($message) { - $track = CheckExternalLinks::getLatestTrack(); - if (!$track || !$track->exists()) return null; - $track->JobInfo = $message; - $track->write(); - } -} diff --git a/code/tasks/CheckExternalLinksTask.php b/code/tasks/CheckExternalLinksTask.php new file mode 100644 index 0000000..97e61c6 --- /dev/null +++ b/code/tasks/CheckExternalLinksTask.php @@ -0,0 +1,204 @@ +silent) Debug::message($message); + } + + /** + * @var bool + */ + protected $silent = false; + + /** + * Turn on or off message output + * + * @param bool $silent + */ + public function setSilent($silent) { + $this->silent = $silent; + } + + protected $title = 'Checking broken External links in the SiteTree'; + + protected $description = 'A task that records external broken links in the SiteTree'; + + protected $enabled = true; + + private static $dependencies = array( + 'LinkChecker' => '%$LinkChecker' + ); + + public function run($request) { + $this->runLinksCheck(); + } + + /** + * @var LinkChecker + */ + protected $linkChecker; + + /** + * @param LinkChecker $linkChecker + */ + public function setLinkChecker(LinkChecker $linkChecker) { + $this->linkChecker = $linkChecker; + } + + /** + * @return LinkChecker + */ + public function getLinkChecker() { + return $this->linkChecker; + } + + + /** + * Check the status of a single link on a page + * + * @param BrokenExternalPageTrack $pageTrack + * @param DOMNode $link + */ + protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) { + $class = $link->getAttribute('class'); + $href = $link->getAttribute('href'); + $markedBroken = preg_match('/\b(ss-broken)\b/', $class); + + // Check link + $httpCode = $this->linkChecker->checkLink($href); + if($httpCode === null) return; // Null link means uncheckable, such as an internal link + + // If this code is broken then mark as such + if($foundBroken = $this->isCodeBroken($httpCode)) { + // Create broken record + $brokenLink = new BrokenExternalLink(); + $brokenLink->Link = $href; + $brokenLink->HTTPCode = $httpCode; + $brokenLink->TrackID = $pageTrack->ID; + $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons + $brokenLink->write(); + } + + // Check if we need to update CSS class, otherwise return + if($markedBroken == $foundBroken) return; + if($foundBroken) { + $class .= ' ss-broken'; + } else { + $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); + } + $link->setAttribute('class', trim($class)); + } + + /** + * Determine if the given HTTP code is "broken" + * + * @param int $httpCode + * @return bool True if this is a broken code + */ + protected function isCodeBroken($httpCode) { + // Null represents no request attempted + if($httpCode === null) return false; + + // do we have any whitelisted codes + $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); + if(is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) return false; + + // Check if code is outside valid range + return $httpCode < 200 || $httpCode > 302; + } + + /** + * Runs the links checker and returns the track used + * + * @param int $limit Limit to number of pages to run, or null to run all + * @return BrokenExternalPageTrackStatus + */ + public function runLinksCheck($limit = null) { + // Check the current status + $status = BrokenExternalPageTrackStatus::get_or_create(); + + // Calculate pages to run + $pageTracks = $status->getIncompleteTracks(); + if($limit) $pageTracks = $pageTracks->limit($limit); + + // Check each page + foreach ($pageTracks as $pageTrack) { + // Flag as complete + $pageTrack->Processed = 1; + $pageTrack->write(); + + // Check value of html area + $page = $pageTrack->Page(); + $this->log("Checking {$page->Title}"); + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); + if (!$htmlValue->isValid()) continue; + + // Check each link + $links = $htmlValue->getElementsByTagName('a'); + foreach($links as $link) { + $this->checkPageLink($pageTrack, $link); + } + + // Update content of page based on link fixes / breakages + $htmlValue->saveHTML(); + $page->Content = $htmlValue->getContent(); + $page->write(); + + // Once all links have been created for this page update HasBrokenLinks + $count = $pageTrack->BrokenLinks()->count(); + $this->log("Found {$count} broken links"); + if($count) { + // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true + DB::query(sprintf( + 'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', + intval($pageTrack->ID) + )); + } + } + + $status->updateJobInfo('Updating completed pages'); + $status->updateStatus(); + return $status; + } + + public static function getLatestTrack() { + return BrokenExternalPageTrackStatus::get_latest(); + } + + public static function getLatestTrackID() { + $track = BrokenExternalPageTrackStatus::get_latest(); + return $track ? $track->ID : null; + } + + public static function getLatestTrackStatus() { + $track = BrokenExternalPageTrackStatus::get_latest(); + return $track ? $track->Status : null; + } + + private function updateCompletedPages($trackID = 0) { + $noPages = BrokenExternalPageTrack::get() + ->filter(array( + 'TrackID' => $trackID, + 'Processed' => 1 + )) + ->count(); + $track = $this->getLatestTrack($trackID); + $track->CompletedPages = $noPages; + $track->write(); + return $noPages; + } + + private function updateJobInfo($message) { + $track = BrokenExternalPageTrackStatus::get_latest(); + if($track) { + $track->JobInfo = $message; + $track->write(); + } + } +} diff --git a/code/tasks/CurlLinkChecker.php b/code/tasks/CurlLinkChecker.php new file mode 100644 index 0000000..4e307ff --- /dev/null +++ b/code/tasks/CurlLinkChecker.php @@ -0,0 +1,49 @@ + true) + ); + } + + /** + * Determine the http status code for a given link + * + * @param string $href URL to check + * @return int HTTP status code, or null if not checkable (not a link) + */ + protected function checkLink($href) { + // Skip non-external links + if(!preg_match('/^https?[^:]*:\/\//', $href)) return null; + + // Check if we have a cached result + $cacheKey = md5($href); + $result = $this->getCache()->load($cacheKey); + if($result !== false) return $result; + + // No cached result so just request + $handle = curl_init($href); + curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); + curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); + curl_setopt($handle, CURLOPT_TIMEOUT, 10); + curl_exec($handle); + $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); + curl_close($handle); + + // Cache result + $this->getCache()->save($httpCode, $cacheKey); + return $httpCode; + } +} diff --git a/code/tasks/LinkChecker.php b/code/tasks/LinkChecker.php new file mode 100644 index 0000000..629f177 --- /dev/null +++ b/code/tasks/LinkChecker.php @@ -0,0 +1,15 @@ +=3.0", "silverstripe/cms": ">=3.0" }, + "require-dev": { + "hafriedlander/silverstripe-phockito": "*", + "phpunit/PHPUnit": "~3.7@stable" + }, "suggest": { "silverstripe/queuedjobs": "Speeds up running the job for Content Editors fropm the report" } diff --git a/javascript/BrokenExternalLinksReport.js b/javascript/BrokenExternalLinksReport.js index ff9acfb..5b3c853 100644 --- a/javascript/BrokenExternalLinksReport.js +++ b/javascript/BrokenExternalLinksReport.js @@ -1,57 +1,63 @@ (function($) { - $('#externalLinksReport').entwine({ - onclick: function() { - $(this).start(); - }, - onmatch: function() { - // poll the current job and update the front end status - $('#externalLinksReport').hide(); - $(this).poll(0); - }, - start: function() { - // initiate a new job - $('#ReportHolder').empty(); - $('#ReportHolder').text('Running report 0%'); - $('#ReportHolder').append(''); - $('#externalLinksReport').hide(); - $.ajax({url: "admin/externallinks/start", async: true, timeout: 3000 }); - $(this).poll(1); - }, - poll: function(start) { - $.ajax({ - url: "admin/externallinks/getJobStatus", - async: true, - success: function(data) { - var obj = $.parseJSON(data); - - // No report, so let user create one - if (!obj) { - $('#externalLinksReport').show(); - return; - } - var completed = obj.Completed ? obj.Completed : 0; - var total = obj.Total ? obj.Total : 0; - var jobStatus = obj.Status ? obj.Status : 'Running'; - if (jobStatus == 'Completed' && start == 0) { - $('#ReportHolder').text('Report Finished ' + completed + '/' + total); - $('#externalLinksReport').show(); - } else { - setTimeout(function() { $('#externalLinksReport').poll(0); }, 1000); - } - if (total && completed) { + $.entwine('ss', function($) { + $('#externalLinksReport').entwine({ + onclick: function() { + $(this).start(); + }, + onmatch: function() { + // poll the current job and update the front end status + $('#externalLinksReport').hide(); + $(this).poll(0); + }, + start: function() { + // initiate a new job + $('#ReportHolder').empty(); + $('#ReportHolder').text('Running report 0%'); + $('#ReportHolder').append(''); + $('#externalLinksReport').hide(); + $.ajax({url: "admin/externallinks/start", async: true, timeout: 3000 }); + $(this).poll(1); + }, + poll: function(start) { + $.ajax({ + url: "admin/externallinks/getJobStatus", + async: true, + success: function(data) { + // No report, so let user create one + if (!data) { + $('#externalLinksReport').show(); + return; + } + + // Parse data + var completed = data.Completed ? data.Completed : 0; + var total = data.Total ? data.Total : 0; + + // If complete status + if (data.Status === 'Completed') { + $('#ReportHolder').text('Report Finished ' + completed + '/' + total); + $('#externalLinksReport').show(); + return; + } + + // If incomplete update status if (completed < total) { var percent = (completed / total) * 100; - $('#ReportHolder').text('Running report ' + completed + '/' + - total + ' (' + percent.toFixed(2) + '%)'); - $('#ReportHolder'). - append(''); - } + $('#ReportHolder') + .text('Running report ' + completed + '/' + total + ' (' + percent.toFixed(2) + '%)') + .append(''); + } + + // Ensure the regular poll method is run + if(!start) { + setTimeout(function() { $('#externalLinksReport').poll(0); }, 1000); + } + }, + error: function(e) { + if(typeof console !== 'undefined') console.log(e); } - }, - error: function(e) { - if(typeof console !== 'undefined') console.log(e); - } - }); - } + }); + } + }); }); }(jQuery)); diff --git a/tests/ExternalLinksTest.php b/tests/ExternalLinksTest.php index 6c7bf61..2a4f640 100644 --- a/tests/ExternalLinksTest.php +++ b/tests/ExternalLinksTest.php @@ -1,32 +1,129 @@ objFromFixture('SiteTree', 'working'); - $working->write(); - $task = new CheckExternalLinks(); - $task->run(null); - $brokenLinks = BrokenExternalLink::get()->column('Link');; - // confirm the working link has not been added as a broken link - $this->assertNotEquals($working->Link, $brokenLinks[0]); + protected $extraDataObjects = array( + 'ExternalLinksTest_Page' + ); + + public function setUp() { + parent::setUp(); + + Injector::nest(); + + // Check dependencies + if (!class_exists('Phockito')) { + $this->skipTest = true; + return $this->markTestSkipped("These tests need the Phockito module installed to run"); + } + + // Mock link checker + $checker = Phockito::mock('LinkChecker'); + Phockito::when($checker) + ->checkLink('http://www.working.com') + ->return(200); + + Phockito::when($checker) + ->checkLink('http://www.broken.com/url/thing') // 404 on working site + ->return(404); + + Phockito::when($checker) + ->checkLink('http://www.broken.com') // 403 on working site + ->return(403); + + Phockito::when($checker) + ->checkLink('http://www.nodomain.com') // no ping + ->return(0); + + Phockito::when($checker) + ->checkLink('/internal/link') + ->return(null); + + Phockito::when($checker) + ->checkLink('[sitetree_link,id=9999]') + ->return(null); + + Phockito::when($checker) + ->checkLink('home') + ->return(null); + + Phockito::when($checker) + ->checkLink('broken-internal') + ->return(null); + + Phockito::when($checker) + ->checkLink('[sitetree_link,id=1]') + ->return(null); + + Phockito::when($checker) + ->checkLink(anything()) // anything else is 404 + ->return(404); + + Injector::inst()->registerService($checker, 'LinkChecker'); } - public function testBrokenLink() { - // uses http://192.0.2.1 for a broken link - $broken = $this->objFromFixture('SiteTree', 'broken'); - $broken->write(); - $task = new CheckExternalLinks(); - $task->run(null); - $brokenLinks = BrokenExternalLink::get(); - $this->assertEquals(1, $brokenLinks->count()); + public function tearDown() { + Injector::unnest(); + parent::tearDown(); } + public function testLinks() { + // Run link checker + $task = CheckExternalLinksTask::create(); + $task->setSilent(true); // Be quiet during the test! + $task->runLinksCheck(); + + // Get all links checked + $status = BrokenExternalPageTrackStatus::get_latest(); + $this->assertEquals('Completed', $status->Status); + $this->assertEquals(5, $status->TotalPages); + $this->assertEquals(5, $status->CompletedPages); + + // Check all pages have had the correct HTML adjusted + for($i = 1; $i <= 5; $i++) { + $page = $this->objFromFixture('ExternalLinksTest_Page', 'page'.$i); + $this->assertNotEmpty($page->Content); + $this->assertEquals( + $page->ExpectedContent, + $page->Content, + "Assert that the content of page{$i} has been updated" + ); + } + + // Check that the correct report of broken links is generated + $links = $status + ->BrokenLinks() + ->sort('Link'); + + $this->assertEquals(4, $links->count()); + $this->assertEquals( + array( + 'http://www.broken.com', + 'http://www.broken.com/url/thing', + 'http://www.broken.com/url/thing', + 'http://www.nodomain.com' + ), + array_values($links->map('ID', 'Link')->toArray()) + ); + + // Check response codes are correct + $expected = array( + 'http://www.broken.com' => 403, + 'http://www.broken.com/url/thing' => 404, + 'http://www.nodomain.com' => 0 + ); + $actual = $links->map('Link', 'HTTPCode')->toArray(); + $this->assertEquals($expected, $actual); + } + + /** + * Test that broken links appears in the reports list + */ public function testReportExists() { - $mock = $this->objFromFixture('SiteTree', 'broken'); $reports = SS_Report::get_reports(); $reportNames = array(); foreach($reports as $report) { @@ -37,3 +134,8 @@ public function testReportExists() { } } +class ExternalLinksTest_Page extends Page implements TestOnly { + private static $db = array( + 'ExpectedContent' => 'HTMLText' + ); +} diff --git a/tests/ExternalLinksTest.yml b/tests/ExternalLinksTest.yml index 46eb0a8..9e92c62 100644 --- a/tests/ExternalLinksTest.yml +++ b/tests/ExternalLinksTest.yml @@ -1,7 +1,56 @@ -SiteTree: - working: - Title: Working Link - Content: 'Localhost' - broken: - Title: Broken Link - Content: 'Broken' \ No newline at end of file +ExternalLinksTest_Page: + # Tests mix of broken and working external links + page1: + Title: 'Page 1' + Content: > +

Links

+ This is a working site +

Other Links

+ but this isn't + ExpectedContent: > +

Links

+ This is a working site +

Other Links

+ but this isn't + # Tests broken external link staying broken + page2: + Title: 'Page 2' + Content: > +

Still Broken

+ ExpectedContent: > +

Still Broken

+ # Tests internal broken links not marking a page as broken + page3: + Title: 'Page 3' + Content: > +

Links

+ Home page + Broken internal page + This is a working site + ExpectedContent: > +

Links

+ Home page + Broken internal page + This is a working site + # Tests httpcode = 0 + page4: + Title: 'Page 4' + Content: > + This shouldn't even have a HTTP response + Another Link +

Copied from another page

+ ExpectedContent: > + This shouldn't even have a HTTP response + Another Link +

Copied from another page

+ # Test page with no broken links + page5: + Title: 'Page 5' + Content: > + Internal Link + Another Link + This is a working site + ExpectedContent: > + Internal Link + Another Link + This is a working site From ec4c47915a2f35fb046a4778571a537cbbfa29db Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Thu, 7 Aug 2014 16:49:20 +1200 Subject: [PATCH 15/32] Update travis, composer, license --- .travis.yml | 23 +++++++++++++++++++++++ LICENSE | 24 ++++++++++++++++++++++++ README.md | 15 ++++++++------- composer.json | 9 +++++---- 4 files changed, 60 insertions(+), 11 deletions(-) create mode 100644 .travis.yml create mode 100644 LICENSE diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..8b034a2 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,23 @@ +language: php + +php: + - 5.3 + - 5.4 + +env: + - DB=MYSQL CORE_RELEASE=3.1 + +matrix: + include: + - php: 5.4 + env: DB=PGSQL CORE_RELEASE=3.1 + +before_script: + - composer self-update + - phpenv rehash + - git clone git://github.com/silverstripe-labs/silverstripe-travis-support.git ~/travis-support + - php ~/travis-support/travis_setup.php --source `pwd` --target ~/builds/ss + - cd ~/builds/ss + +script: + - vendor/bin/phpunit externallinks/tests diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2caafb6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,24 @@ +* Copyright (c) 2014, Silverstripe Ltd. +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are met: +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* * Neither the name of the nor the +* names of its contributors may be used to endorse or promote products +* derived from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY Silverstripe Ltd. ``AS IS'' AND ANY +* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL Silverstripe Ltd. BE LIABLE FOR ANY +* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index ead445f..affbf19 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # External links +[![Build Status](https://travis-ci.org/silverstripe-labs/silverstripe-externallinks.svg?branch=master)](https://travis-ci.org/silverstripe-labs/silverstripe-externallinks) + ## Introduction The external links module is a task and ModelAdmin to track and to report on broken external links. @@ -10,7 +12,7 @@ The external links module is a task and ModelAdmin to track and to report on bro ## Requirements - * SilverStripe 3.0 + + * SilverStripe 3.1 + ## Features @@ -19,12 +21,11 @@ The external links module is a task and ModelAdmin to track and to report on bro ## Installation - 1. Download the module form GitHub (Composer support to be added) - 2. Extract the file (if you are on windows try 7-zip for extracting tar.gz files - 3. Make sure the folder after being extracted is named 'externallinks' - 4. Place this directory in your sites root directory. This is the one with framework and cms in it. - 5. Run in your browser - `/dev/build` to rebuild the database. - 6. Run the following task *http://path.to.silverstripe/dev/tasks/CheckExternalLinks* to check for broken external links + 1. If you have composer you can use `composer require silverstripe/externallinks:*`. Otherwise, + download the module from GitHub and extract to the 'externallinks' folder. Place this directory + in your sites root directory. This is the one with framework and cms in it. + 2. Run in your browser - `/dev/build` to rebuild the database. + 3. Run the following task *http://path.to.silverstripe/dev/tasks/CheckExternalLinks* to check for broken external links ## Report ## diff --git a/composer.json b/composer.json index 9726a12..4f01240 100644 --- a/composer.json +++ b/composer.json @@ -1,8 +1,9 @@ { - "name": "kmayo-ss/externallinks", + "name": "silverstripe/externallinks", "description": "Adds tracking of external broken links to the SilverStripe CMS", "type": "silverstripe-module", - "keywords": ["silverstripe", "broken links", "href"], + "keywords": ["silverstripe", "broken", "links", "href"], + "license": "BSD-3-Clause", "authors": [ { "name": "Kirk Mayo", @@ -10,8 +11,8 @@ } ], "require": { - "silverstripe/framework": ">=3.0", - "silverstripe/cms": ">=3.0" + "silverstripe/framework": "~3.1", + "silverstripe/cms": "~3.1" }, "require-dev": { "hafriedlander/silverstripe-phockito": "*", From 1fb59f92470c4b76abb655ea3365ef723af96a0f Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Wed, 13 Aug 2014 09:17:02 +1200 Subject: [PATCH 16/32] Fix incorrect function visibility --- code/tasks/CurlLinkChecker.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/tasks/CurlLinkChecker.php b/code/tasks/CurlLinkChecker.php index 4e307ff..a84439d 100644 --- a/code/tasks/CurlLinkChecker.php +++ b/code/tasks/CurlLinkChecker.php @@ -24,7 +24,7 @@ protected function getCache() { * @param string $href URL to check * @return int HTTP status code, or null if not checkable (not a link) */ - protected function checkLink($href) { + public function checkLink($href) { // Skip non-external links if(!preg_match('/^https?[^:]*:\/\//', $href)) return null; From 732834440839bf003a4decc2516756db81a7f0d4 Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Wed, 13 Aug 2014 10:20:39 +1200 Subject: [PATCH 17/32] API Add description for response code to report --- code/model/BrokenExternalLink.php | 27 +++++++++++++++++++--- code/reports/BrokenExternalLinksReport.php | 2 +- tests/ExternalLinksTest.php | 10 ++++++++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php index 7d21e07..53ff03f 100644 --- a/code/model/BrokenExternalLink.php +++ b/code/model/BrokenExternalLink.php @@ -27,7 +27,7 @@ public function Page() { public static $summary_fields = array( 'Page.Title' => 'Page', - 'HTTPCode' => 'HTTP Code', + 'HTTPCodeDescription' => 'HTTP Code', 'Created' => 'Created' ); @@ -35,15 +35,36 @@ public function Page() { 'HTTPCode' => array('title' => 'HTTP Code') ); - function canEdit($member = false) { + public function canEdit($member = false) { return false; } - function canView($member = false) { + public function canView($member = false) { $member = $member ? $member : Member::currentUser(); $codes = array('content-authors', 'administrators'); return Permission::checkMember($member, $codes); } + + /** + * Retrieve a human readable description of a response code + * + * @return string + */ + public function getHTTPCodeDescription() { + $code = $this->HTTPCode; + if(empty($code)) { + // Assume that $code = 0 means there was no response + $description = _t(__CLASS__.'.NOTAVAILABLE', 'Server Not Available'); + } elseif( + ($descriptions = Config::inst()->get('SS_HTTPResponse', 'status_codes')) + && isset($descriptions[$code]) + ) { + $description = $descriptions[$code]; + } else { + $description = _t(__CLASS__.'.UNKNOWNRESPONSE', 'Unknown Response Code'); + } + return sprintf("%d (%s)", $code, $description); + } } diff --git a/code/reports/BrokenExternalLinksReport.php b/code/reports/BrokenExternalLinksReport.php index 5809cc6..233d898 100644 --- a/code/reports/BrokenExternalLinksReport.php +++ b/code/reports/BrokenExternalLinksReport.php @@ -30,7 +30,7 @@ public function columns() { ); } ), - 'HTTPCode' => 'HTTP Error Code', + 'HTTPCodeDescription' => 'HTTP Error Code', "Title" => array( "title" => 'Page link is on', 'formatting' => function($value, $item) { diff --git a/tests/ExternalLinksTest.php b/tests/ExternalLinksTest.php index 2a4f640..b494e92 100644 --- a/tests/ExternalLinksTest.php +++ b/tests/ExternalLinksTest.php @@ -118,6 +118,16 @@ public function testLinks() { ); $actual = $links->map('Link', 'HTTPCode')->toArray(); $this->assertEquals($expected, $actual); + + // Check response descriptions are correct + i18n::set_locale('en_NZ'); + $expected = array( + 'http://www.broken.com' => '403 (Forbidden)', + 'http://www.broken.com/url/thing' => '404 (Not Found)', + 'http://www.nodomain.com' => '0 (Server Not Available)' + ); + $actual = $links->map('Link', 'HTTPCodeDescription')->toArray(); + $this->assertEquals($expected, $actual); } /** From 1aa5491d6a3a21a1cec5796e23085f9bd8bb5f7d Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Wed, 13 Aug 2014 10:57:41 +1200 Subject: [PATCH 18/32] Update docs --- README.md | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index affbf19..55ce224 100644 --- a/README.md +++ b/README.md @@ -25,12 +25,34 @@ The external links module is a task and ModelAdmin to track and to report on bro download the module from GitHub and extract to the 'externallinks' folder. Place this directory in your sites root directory. This is the one with framework and cms in it. 2. Run in your browser - `/dev/build` to rebuild the database. - 3. Run the following task *http://path.to.silverstripe/dev/tasks/CheckExternalLinks* to check for broken external links + 3. Run the following task *http://path.to.silverstripe/dev/tasks/CheckExternalLinks* to check for + broken external links ## Report ## -A new report is added called 'External Broken links report' from here you can also start a new job which is run -via AJAX and in batches of 10 so it can be run via content editors who do not have access to jobs or tasks. +A new report is added called 'External Broken links report'. When viewing this report, a user may press +the "Create new report" button which will trigger an ajax request to initiate a report run. + +In this initial ajax request this module will do one of two things, depending on which modules are included: + +* If the queuedjobs module is installed, a new queued job will be initiated. The queuedjobs module will then + manage the progress of the task. +* If the queuedjobs module is absent, then the controller will fallback to running a buildtask in the background. + This is less robust, as a failure or error during this process will abort the run. + +In either case, the background task will loop over every page in the system, inspecting all external urls and +checking the status code returned by requesting each one. If a URL returns a response code that is considered +"broken" (defined as < 200 or > 302) then the `ss-broken` css class will be assigned to that url, and +a line item will be added to the report. If a previously broken link has been corrected or fixed, then +this class is removed. + +In the actual report generated the user can click on any broken link item to either view the link in their browser, +or edit the containing page in the CMS. + +While a report is running the current status of this report will be displayed on the report details page, along +with the status. The user may leave this page and return to it later to view the ongoing status of this report. + +Any subsequent report may not be generated until a prior report has completed. ## Dev task ## From 486ccec95c1baea9df5cce5b8b03662dcd368ad6 Mon Sep 17 00:00:00 2001 From: Sean Harvey Date: Wed, 13 Aug 2014 13:48:33 +1200 Subject: [PATCH 19/32] Fixing CurlLinkChecker::checkLink() using incorrect visibility --- code/tasks/CurlLinkChecker.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/tasks/CurlLinkChecker.php b/code/tasks/CurlLinkChecker.php index 4e307ff..a84439d 100644 --- a/code/tasks/CurlLinkChecker.php +++ b/code/tasks/CurlLinkChecker.php @@ -24,7 +24,7 @@ protected function getCache() { * @param string $href URL to check * @return int HTTP status code, or null if not checkable (not a link) */ - protected function checkLink($href) { + public function checkLink($href) { // Skip non-external links if(!preg_match('/^https?[^:]*:\/\//', $href)) return null; From aef8f6f5214f840368776091bbe86c44d91217ad Mon Sep 17 00:00:00 2001 From: Sean Harvey Date: Wed, 13 Aug 2014 14:23:03 +1200 Subject: [PATCH 20/32] Fixing CSV export columns to match the ones show in broken links report --- code/model/BrokenExternalLink.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php index 53ff03f..ef1f52d 100644 --- a/code/model/BrokenExternalLink.php +++ b/code/model/BrokenExternalLink.php @@ -26,9 +26,10 @@ public function Page() { } public static $summary_fields = array( - 'Page.Title' => 'Page', - 'HTTPCodeDescription' => 'HTTP Code', - 'Created' => 'Created' + 'Created' => 'Checked', + 'Link' => 'External Link', + 'HTTPCodeDescription' => 'HTTP Error Code', + 'Page.Title' => 'Page link is on' ); public static $searchable_fields = array( From 343144865718a4a943749d064e9a33b81ab827b5 Mon Sep 17 00:00:00 2001 From: Sean Harvey Date: Wed, 13 Aug 2014 14:27:40 +1200 Subject: [PATCH 21/32] Fixing static visibility and moving method to underneath statics --- code/model/BrokenExternalLink.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php index ef1f52d..78ed49c 100644 --- a/code/model/BrokenExternalLink.php +++ b/code/model/BrokenExternalLink.php @@ -18,24 +18,24 @@ class BrokenExternalLink extends DataObject { 'Status' => 'BrokenExternalPageTrackStatus' ); - /** - * @return SiteTree - */ - public function Page() { - return $this->Track()->Page(); - } - - public static $summary_fields = array( + private static $summary_fields = array( 'Created' => 'Checked', 'Link' => 'External Link', 'HTTPCodeDescription' => 'HTTP Error Code', 'Page.Title' => 'Page link is on' ); - public static $searchable_fields = array( + private static $searchable_fields = array( 'HTTPCode' => array('title' => 'HTTP Code') ); + /** + * @return SiteTree + */ + public function Page() { + return $this->Track()->Page(); + } + public function canEdit($member = false) { return false; } From 2a3f99b2cd4191c379af476474f2365f14436aec Mon Sep 17 00:00:00 2001 From: Sean Harvey Date: Wed, 13 Aug 2014 14:46:04 +1200 Subject: [PATCH 22/32] Removing unused static funcs and moving existing statics to top of class. --- code/tasks/CheckExternalLinksTask.php | 56 ++++++++++----------------- 1 file changed, 20 insertions(+), 36 deletions(-) diff --git a/code/tasks/CheckExternalLinksTask.php b/code/tasks/CheckExternalLinksTask.php index 97e61c6..6c3fadf 100644 --- a/code/tasks/CheckExternalLinksTask.php +++ b/code/tasks/CheckExternalLinksTask.php @@ -2,14 +2,9 @@ class CheckExternalLinksTask extends BuildTask { - /** - * Log a message - * - * @param string $message - */ - protected function log($message) { - if(!$this->silent) Debug::message($message); - } + private static $dependencies = array( + 'LinkChecker' => '%$LinkChecker' + ); /** * @var bool @@ -17,13 +12,9 @@ protected function log($message) { protected $silent = false; /** - * Turn on or off message output - * - * @param bool $silent + * @var LinkChecker */ - public function setSilent($silent) { - $this->silent = $silent; - } + protected $linkChecker; protected $title = 'Checking broken External links in the SiteTree'; @@ -31,18 +22,26 @@ public function setSilent($silent) { protected $enabled = true; - private static $dependencies = array( - 'LinkChecker' => '%$LinkChecker' - ); + /** + * Log a message + * + * @param string $message + */ + protected function log($message) { + if(!$this->silent) Debug::message($message); + } public function run($request) { $this->runLinksCheck(); } - /** - * @var LinkChecker + * Turn on or off message output + * + * @param bool $silent */ - protected $linkChecker; + public function setSilent($silent) { + $this->silent = $silent; + } /** * @param LinkChecker $linkChecker @@ -58,7 +57,6 @@ public function getLinkChecker() { return $this->linkChecker; } - /** * Check the status of a single link on a page * @@ -167,20 +165,6 @@ public function runLinksCheck($limit = null) { return $status; } - public static function getLatestTrack() { - return BrokenExternalPageTrackStatus::get_latest(); - } - - public static function getLatestTrackID() { - $track = BrokenExternalPageTrackStatus::get_latest(); - return $track ? $track->ID : null; - } - - public static function getLatestTrackStatus() { - $track = BrokenExternalPageTrackStatus::get_latest(); - return $track ? $track->Status : null; - } - private function updateCompletedPages($trackID = 0) { $noPages = BrokenExternalPageTrack::get() ->filter(array( @@ -188,7 +172,7 @@ private function updateCompletedPages($trackID = 0) { 'Processed' => 1 )) ->count(); - $track = $this->getLatestTrack($trackID); + $track = BrokenExternalPageTrackStatus::get_latest(); $track->CompletedPages = $noPages; $track->write(); return $noPages; From 65117b2599cb3e111fc17f2640788014917f290b Mon Sep 17 00:00:00 2001 From: Sean Harvey Date: Wed, 13 Aug 2014 14:55:47 +1200 Subject: [PATCH 23/32] BUG Indicator stuck on 0% until you refresh the page. admin/externallinks/start is called, but asynchronously, so the poll() call is never called immediately on start, you have to refresh the page to get it to show. --- javascript/BrokenExternalLinksReport.js | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/javascript/BrokenExternalLinksReport.js b/javascript/BrokenExternalLinksReport.js index 5b3c853..c6d84c9 100644 --- a/javascript/BrokenExternalLinksReport.js +++ b/javascript/BrokenExternalLinksReport.js @@ -1,13 +1,14 @@ (function($) { $.entwine('ss', function($) { $('#externalLinksReport').entwine({ + PollTimeout: null, onclick: function() { - $(this).start(); + this.start(); }, onmatch: function() { // poll the current job and update the front end status $('#externalLinksReport').hide(); - $(this).poll(0); + this.poll(); }, start: function() { // initiate a new job @@ -15,10 +16,12 @@ $('#ReportHolder').text('Running report 0%'); $('#ReportHolder').append(''); $('#externalLinksReport').hide(); - $.ajax({url: "admin/externallinks/start", async: true, timeout: 3000 }); - $(this).poll(1); + $.ajax({url: "admin/externallinks/start", async: false, timeout: 3000 }); + this.poll(); }, - poll: function(start) { + poll: function() { + var self = this; + $.ajax({ url: "admin/externallinks/getJobStatus", async: true, @@ -47,11 +50,14 @@ .text('Running report ' + completed + '/' + total + ' (' + percent.toFixed(2) + '%)') .append(''); } - + // Ensure the regular poll method is run - if(!start) { - setTimeout(function() { $('#externalLinksReport').poll(0); }, 1000); + // kill any existing timeout + if(self.getPollTimeout() !== null) { + clearTimeout(self.getPollTimeout()); } + + self.setPollTimeout(setTimeout(function() { $('#externalLinksReport').poll(); }, 1000)); }, error: function(e) { if(typeof console !== 'undefined') console.log(e); From a35241cd105661f2ad52d079528082d632948633 Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Wed, 19 Nov 2014 09:54:06 +1300 Subject: [PATCH 24/32] Update translations --- code/model/BrokenExternalLink.php | 4 ++-- lang/_manifest_exclude | 0 lang/en.yml | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 lang/_manifest_exclude create mode 100644 lang/en.yml diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php index 78ed49c..18c484f 100644 --- a/code/model/BrokenExternalLink.php +++ b/code/model/BrokenExternalLink.php @@ -55,14 +55,14 @@ public function getHTTPCodeDescription() { $code = $this->HTTPCode; if(empty($code)) { // Assume that $code = 0 means there was no response - $description = _t(__CLASS__.'.NOTAVAILABLE', 'Server Not Available'); + $description = _t('BrokenExternalLink.NOTAVAILABLE', 'Server Not Available'); } elseif( ($descriptions = Config::inst()->get('SS_HTTPResponse', 'status_codes')) && isset($descriptions[$code]) ) { $description = $descriptions[$code]; } else { - $description = _t(__CLASS__.'.UNKNOWNRESPONSE', 'Unknown Response Code'); + $description = _t('BrokenExternalLink.UNKNOWNRESPONSE', 'Unknown Response Code'); } return sprintf("%d (%s)", $code, $description); } diff --git a/lang/_manifest_exclude b/lang/_manifest_exclude new file mode 100644 index 0000000..e69de29 diff --git a/lang/en.yml b/lang/en.yml new file mode 100644 index 0000000..27c4b0c --- /dev/null +++ b/lang/en.yml @@ -0,0 +1,17 @@ +en: + BrokenExternalLink: + NOTAVAILABLE: 'Server Not Available' + PLURALNAME: 'Broken External Links' + SINGULARNAME: 'Broken External Link' + UNKNOWNRESPONSE: 'Unknown Response Code' + BrokenExternalPageTrack: + PLURALNAME: 'Broken External Page Tracks' + SINGULARNAME: 'Broken External Page Track' + BrokenExternalPageTrackStatus: + PLURALNAME: 'Broken External Page Track Statuss' + SINGULARNAME: 'Broken External Page Track Status' + CheckExternalLiksJob: + TITLE: 'Checking for external broken links' + ExternalBrokenLinksReport: + EXTERNALBROKENLINKS: 'External broken links report' + RUNREPORT: 'Create new report' From 38659bac9839e12ceb028ab74736748a89f55bf3 Mon Sep 17 00:00:00 2001 From: Ingo Schommer Date: Wed, 29 Apr 2015 14:28:08 +1200 Subject: [PATCH 25/32] Include Hamcrest without clashing with PHPUnit globals PHPUnit had a Functions.php with global methods like any() for a while (3.7 at least), which clashes with similar globals from Hamcrest (used in Phockito). Both PHPUnit and Phockito use 'classmap' composer autoloading, but that's not directly requiring/evaluating the files. The problem is caused by SilverStripe's ClassLoader which does require ALL subclasses of SapphireTest. This in turn causes PHP outside of the class context to execute, which includes Hamcrest. Changing the include_hamcrest() is not strictly necessary here, but makes the code a bit more resilient against any preceding test including Functions.php from PHPUnit. See https://github.com/hafriedlander/phockito/issues/32 for context. --- tests/ExternalLinksTest.php | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/ExternalLinksTest.php b/tests/ExternalLinksTest.php index b494e92..754aeaf 100644 --- a/tests/ExternalLinksTest.php +++ b/tests/ExternalLinksTest.php @@ -1,7 +1,5 @@ return(null); Phockito::when($checker) - ->checkLink(anything()) // anything else is 404 + ->checkLink(Hamcrest_Matchers::anything()) // anything else is 404 ->return(404); Injector::inst()->registerService($checker, 'LinkChecker'); From 4411d2e85dd116a47b8f68bd134a87532a086e2f Mon Sep 17 00:00:00 2001 From: Daniel Hensby Date: Mon, 20 Jul 2015 16:20:54 +0100 Subject: [PATCH 26/32] Move to new travis containerised infrastructure --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8b034a2..482eb66 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,6 @@ -language: php +language: php + +sudo: false php: - 5.3 @@ -13,7 +15,7 @@ matrix: env: DB=PGSQL CORE_RELEASE=3.1 before_script: - - composer self-update + - composer self-update || true - phpenv rehash - git clone git://github.com/silverstripe-labs/silverstripe-travis-support.git ~/travis-support - php ~/travis-support/travis_setup.php --source `pwd` --target ~/builds/ss From 928f6049a01b696f7f7d7a14b3f5ac78195e9ff3 Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Fri, 30 Oct 2015 14:53:10 +1300 Subject: [PATCH 27/32] Include tests for 3.2 and php 5.6 --- .travis.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 482eb66..87badff 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ sudo: false php: - 5.3 - - 5.4 env: - DB=MYSQL CORE_RELEASE=3.1 @@ -13,6 +12,10 @@ matrix: include: - php: 5.4 env: DB=PGSQL CORE_RELEASE=3.1 + - php: 5.5 + env: DB=MYSQL CORE_RELEASE=3 + - php: 5.6 + env: DB=MYSQL CORE_RELEASE=3.2 before_script: - composer self-update || true From c37a13b3e22d6811d2c12e9bc352c86bd2e8e570 Mon Sep 17 00:00:00 2001 From: Mikron Date: Fri, 6 Nov 2015 11:19:37 +0100 Subject: [PATCH 28/32] PL translation added --- lang/pl.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 lang/pl.yml diff --git a/lang/pl.yml b/lang/pl.yml new file mode 100644 index 0000000..9a63f7d --- /dev/null +++ b/lang/pl.yml @@ -0,0 +1,17 @@ +pl: + BrokenExternalLink: + NOTAVAILABLE: 'Serwer niedostępny' + PLURALNAME: 'Uszkodzone linki zewnętrzne' + SINGULARNAME: 'Uszkodzony link zewnętrzny' + UNKNOWNRESPONSE: 'Nieznany kod odpowiedzi' + BrokenExternalPageTrack: + PLURALNAME: 'Wykrywania wadliwych stron zewnętrznych' + SINGULARNAME: 'Wykrywanie wadliwych stron zewnętrznych' + BrokenExternalPageTrackStatus: + PLURALNAME: 'Statusy wykrywania wadliwych stron zewnętrznych' + SINGULARNAME: 'Status wykrywania wadliwych stron zewnętrznych' + CheckExternalLiksJob: + TITLE: 'Wyszukiwanie uszkodzonych linków zewnętrznych' + ExternalBrokenLinksReport: + EXTERNALBROKENLINKS: 'Raport uszkodzonych linków zewnętrznych' + RUNREPORT: 'Stwórz nowy raport' From 0531d699e8580c197ffa4718229998b290dc1ef1 Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Mon, 16 Nov 2015 10:22:03 +1300 Subject: [PATCH 29/32] Add module to transifex --- .tx/config | 8 ++++++++ lang/pl.yml | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 .tx/config diff --git a/.tx/config b/.tx/config new file mode 100644 index 0000000..360a807 --- /dev/null +++ b/.tx/config @@ -0,0 +1,8 @@ +[main] +host = https://www.transifex.com + +[silverstripe-externallinks.master] +file_filter = lang/.yml +source_file = lang/en.yml +source_lang = en +type = YML diff --git a/lang/pl.yml b/lang/pl.yml index 9a63f7d..795f089 100644 --- a/lang/pl.yml +++ b/lang/pl.yml @@ -2,7 +2,7 @@ pl: BrokenExternalLink: NOTAVAILABLE: 'Serwer niedostępny' PLURALNAME: 'Uszkodzone linki zewnętrzne' - SINGULARNAME: 'Uszkodzony link zewnętrzny' + SINGULARNAME: 'Uszkodzony link zewnętrzny' UNKNOWNRESPONSE: 'Nieznany kod odpowiedzi' BrokenExternalPageTrack: PLURALNAME: 'Wykrywania wadliwych stron zewnętrznych' From 929bf30ea5c4103730e457813b9265a0c43887f1 Mon Sep 17 00:00:00 2001 From: Damian Mooyman Date: Thu, 19 Nov 2015 13:28:08 +1300 Subject: [PATCH 30/32] Release 1.0.3 --- CHANGELOG.md | 10 ++++++++++ README.md | 4 +++- composer.json | 5 +++++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..64c19ac --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,10 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +This project adheres to [Semantic Versioning](http://semver.org/). + +## [1.0.3] + +* Changelog added. +* Update translations. diff --git a/README.md b/README.md index 55ce224..9bae495 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ The external links module is a task and ModelAdmin to track and to report on bro ## Maintainer Contact - * Kirk Mayo kirk (at) silverstripe (dot) com + * Damian Mooyman (@tractorcow) ## Requirements @@ -19,6 +19,8 @@ The external links module is a task and ModelAdmin to track and to report on bro * Add external links to broken links reports * Add a task to track external broken links +See the [changelog](CHANGELOG.md) for version history. + ## Installation 1. If you have composer you can use `composer require silverstripe/externallinks:*`. Otherwise, diff --git a/composer.json b/composer.json index 4f01240..e358e3c 100644 --- a/composer.json +++ b/composer.json @@ -20,5 +20,10 @@ }, "suggest": { "silverstripe/queuedjobs": "Speeds up running the job for Content Editors fropm the report" + }, + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } } } From 638844b34a8756009667831dae408db8e853e832 Mon Sep 17 00:00:00 2001 From: helpfulrobot Date: Sat, 21 Nov 2015 20:13:30 +1300 Subject: [PATCH 31/32] Added standard code of conduct --- code-of-conduct.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 code-of-conduct.md diff --git a/code-of-conduct.md b/code-of-conduct.md new file mode 100644 index 0000000..53bf39c --- /dev/null +++ b/code-of-conduct.md @@ -0,0 +1 @@ +When having discussions about this module in issues or pull request please adhere to the [SilverStripe Community Code of Conduct](https://docs.silverstripe.org/en/contributing/code_of_conduct). From 14ccb2aa8d33ecb120ca6e5ac294c52de5e1d973 Mon Sep 17 00:00:00 2001 From: helpfulrobot Date: Thu, 17 Dec 2015 10:34:43 +1300 Subject: [PATCH 32/32] Added standard .editorconfig file --- .editorconfig | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..47ae637 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,17 @@ +# For more information about the properties used in this file, +# please see the EditorConfig documentation: +# http://editorconfig.org + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true + +[{*.yml,package.json}] +indent_size = 2 + +# The indent size used in the package.json file cannot be changed: +# https://github.com/npm/npm/pull/3180#issuecomment-16336516