Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pkp/pkp-lib#10321 Fix ConvertApacheAccessLogFile CLI tool #10322

Merged
merged 1 commit into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 52 additions & 8 deletions classes/cliTool/traits/ConvertLogFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
use APP\statistics\StatisticsHelper;
use DateTime;
use Exception;
use PKP\config\Config;
use PKP\core\Core;
use PKP\core\Registry;
use PKP\db\DAORegistry;
Expand Down Expand Up @@ -169,7 +170,6 @@ public function convert(string $fileName): void
}

$newEntry['userAgent'] = $entryData['userAgent'];
$newEntry['canonicalUrl'] = $entryData['url'];

[
'workingAssocType' => $assocType,
Expand All @@ -188,13 +188,23 @@ public function convert(string $fileName): void
$context = $this->contextsByPath[$foundContextPath];
$newEntry['contextId'] = $context->getId();

$this->setAssoc($assocType, $op, $args, $newEntry);
// temporarily set the canonicalUrlPage that is needed in the child class
$newEntry['canonicalUrlPage'] = $page;
$this->setAssoc($assocType, $args, $newEntry);
if (!array_key_exists('assocType', $newEntry)) {
if (!$this->isApacheAccessLogFile()) {
fwrite(STDERR, "The URL {$entryData['url']} in the line number {$lineNumber} was not considered." . PHP_EOL);
}
continue;
}

$canonicalUrl = $entryData['url']; // if this is not the apache log file i.e. it is the internal log file, the URLs are already canonical
if ($this->isApacheAccessLogFile()) {
$canonicalUrl = $this->getCanonicalUrl($foundContextPath, $newEntry['canonicalUrlPage'], $newEntry['canonicalUrlOp'], $newEntry['canonicalUrlArgs'] ?? null);
}
$newEntry['canonicalUrl'] = $canonicalUrl;
// unset elements that are temporarily used and should not be logged
unset($newEntry['canonicalUrlPage'], $newEntry['canonicalUrlOp'], $newEntry['canonicalUrlArgs']);
} else {
continue;
}
Expand Down Expand Up @@ -408,12 +418,14 @@ protected function getExpectedPageAndOp(): array
break;
case 'omp':
// Before 3.4 OMP did not have chapter assoc type i.e. chapter landing page
// so no need to consider it here
// consider it here however, in order to allow current apache access log file conversion
$pageAndOp = $pageAndOp + [
Application::ASSOC_TYPE_SUBMISSION_FILE => [
'catalog/download'],
Application::ASSOC_TYPE_MONOGRAPH => [
'catalog/book'],
Application::ASSOC_TYPE_CHAPTER => [
'catalog/book'],
Application::ASSOC_TYPE_SERIES => [
'catalog/series']
];
Expand Down Expand Up @@ -479,7 +491,7 @@ protected static function getContextPaths(string $urlInfo, bool $isPathInfo): ar
protected static function getPage(string $urlInfo, bool $isPathInfo): string
{
$page = self::getUrlComponents($urlInfo, $isPathInfo, 0, 'page');
return Core::cleanFileVar(is_null($page) ? '' : $page);
return Core::cleanFileVar($page ?? '');
}

/**
Expand All @@ -490,7 +502,7 @@ protected static function getPage(string $urlInfo, bool $isPathInfo): string
protected static function getOp(string $urlInfo, bool $isPathInfo): string
{
$operation = self::getUrlComponents($urlInfo, $isPathInfo, 1, 'op');
return Core::cleanFileVar(empty($operation) ? 'index' : $operation);
return Core::cleanFileVar($operation ?: 'index');
}

/**
Expand All @@ -508,7 +520,7 @@ protected static function getArgs(string $urlInfo, bool $isPathInfo): array
* Get url components (page, operation and args)
* based on the passed offset.
*/
protected static function getUrlComponents(string $urlInfo, bool $isPathInfo, int $offset, string $varName = ''): mixed
protected static function getUrlComponents(string $urlInfo, bool $isPathInfo, int $offset, string $varName = ''): array|string|null
{
$component = null;

Expand All @@ -517,7 +529,6 @@ protected static function getUrlComponents(string $urlInfo, bool $isPathInfo, in
$isArrayComponent = true;
}
if ($isPathInfo) {
$application = Application::get();
$contextDepth = 1; // Was $application->getContextDepth();

$vars = explode('/', trim($urlInfo, '/'));
Expand All @@ -544,10 +555,43 @@ protected static function getUrlComponents(string $urlInfo, bool $isPathInfo, in
return $component;
}

/**
* Construct the URL from context path, page, op, and params
*/
protected function getCanonicalUrl(string $contextPath, string $canonicalUrlPage, string $canonicalUrlOp, array $canonicalUrlArgs = null): string
{
$canonicalUrl = Application::get()->getDispatcher()->url(
Application::get()->getRequest(),
Application::ROUTE_PAGE,
$contextPath,
$canonicalUrlPage,
$canonicalUrlOp,
$canonicalUrlArgs,
);

// Make sure we log the server name and not aliases.
$configBaseUrl = Config::getVar('general', 'base_url');
$requestBaseUrl = Application::get()->getRequest()->getBaseUrl();
if ($requestBaseUrl !== $configBaseUrl) {
// Make sure it's not an url override (no alias on that case).
if (!in_array($requestBaseUrl, Config::getContextBaseUrls()) &&
$requestBaseUrl !== Config::getVar('general', 'base_url[index]')) {
// Alias found, replace it by base_url from config file.
// Make sure we use the correct base url override value for the context, if any.
$baseUrlReplacement = Config::getVar('general', 'base_url[' . $contextPath . ']');
if (!$baseUrlReplacement) {
$baseUrlReplacement = $configBaseUrl;
}
$canonicalUrl = str_replace($requestBaseUrl, $baseUrlReplacement, $canonicalUrl);
}
}
return $canonicalUrl;
}

/**
* Set assoc type and IDs from the passed page, operation and arguments.
*/
protected function setAssoc(int $assocType, string $op, array $args, array &$newEntry): void
protected function setAssoc(int $assocType, array $args, array &$newEntry): void
{
$application = Application::get();
$applicationName = $application->getName();
Expand Down
Loading