Skip to content

Commit

Permalink
Ignore up to 30 consecutive malformed lines.
Browse files Browse the repository at this point in the history
Previously, when the parser encountered a bad line, it would throw an exception and stop execution. This was a problem when trying to parse logs that contain malformed requests (e.g. due to someone trying to hack the server; yes, that has happened). Most of the file was still good, but the malformed lines caused the parser to stop half-way.

Instead, lets ignore small amounts of parser errors and keep going. However, if there are lots of consecutive errors, that probably means the file is not actually a valid wp-update-server log, so lets stop then.
  • Loading branch information
YahnisElsts committed Jun 9, 2017
1 parent be61a44 commit e5942ad
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 3 deletions.
22 changes: 20 additions & 2 deletions src/BasicLogAnalyser.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ class BasicLogAnalyser {
*/
const INVALID_VER_REPLACEMENT = 'obfuscated';

const MAX_CONSECUTIVE_BAD_LINES = 30;

private $logFiles;

/**
Expand Down Expand Up @@ -114,8 +116,9 @@ private function sortByFirstTimestamp($fileNames) {
*
* @param string|int $fromTimestamp
* @param string|int $toTimestamp
* @param bool $ignoreConsecutiveErrors
*/
public function parse($fromTimestamp = null, $toTimestamp = null) {
public function parse($fromTimestamp = null, $toTimestamp = null, $ignoreConsecutiveErrors = false) {
if (isset($fromTimestamp) && !is_int($fromTimestamp)) {
$fromTimestamp = strtotime($fromTimestamp);
}
Expand Down Expand Up @@ -164,6 +167,7 @@ public function parse($fromTimestamp = null, $toTimestamp = null) {

$this->database->beginTransaction();
$lastHour = -1;
$consecutiveMalformedLines = 0;

while (!$input->feof()) {
$this->currentLineNumber++;
Expand All @@ -174,7 +178,21 @@ public function parse($fromTimestamp = null, $toTimestamp = null) {
continue;
}

$entry = $this->parseLogEntry($line);
try {
$entry = $this->parseLogEntry($line);
$consecutiveMalformedLines = 0;
} catch (LogParserException $ex) {
$this->output($ex->getMessage());
$consecutiveMalformedLines++;

if (!$ignoreConsecutiveErrors && ($consecutiveMalformedLines > self::MAX_CONSECUTIVE_BAD_LINES)) {
$this->output('Error: Too many consecutive bad lines. Is this really a valid log file?');
$this->output('Parsing was stopped. Use --ignore-bad-lines to disable this safeguard.');
break;
}
continue;
}

$timestamp = $entry['timestamp'];
$date = gmdate('Y-m-d', $timestamp);
$slug = $entry['slug'];
Expand Down
6 changes: 5 additions & 1 deletion src/LogParserCli.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public function run() {
}
}

$analyser->parse($options['dateRange']['from'], $options['dateRange']['to']);
$analyser->parse($options['dateRange']['from'], $options['dateRange']['to'], $options['ignore-bad-lines']);

printf(
"Peak memory usage: %.2f MiB\n",
Expand All @@ -48,6 +48,7 @@ private function parseOptions() {
'from:',
'to:',
'from-last-date',
'ignore-bad-lines',
'help'
)
);
Expand Down Expand Up @@ -109,6 +110,8 @@ private function parseOptions() {
echo "Ignoring --from-last-date because --from is specified.\n";
}

$options['ignore-bad-lines'] = isset($options['ignore-bad-lines']);

return $options;
}

Expand All @@ -128,6 +131,7 @@ private function showUsage() {
--to <YYYY-MM-DD> Parse up to this date (UTC).
--from-last-date Automatically restart analysis from the last processed date.
If the database is empty this flag has no effect.
--ignore-bad-lines Continue parsing even if there are lots of consecutive malformed lines.
--help Display this message.
You must specify either "--log" or "--dir". All other arguments are optional.
Expand Down

0 comments on commit e5942ad

Please sign in to comment.