From 91cfd5fd9de4714e23f55499714c841802503964 Mon Sep 17 00:00:00 2001
From: Marco Mambelli
Date: Mon, 16 Dec 2024 00:44:00 -0600
Subject: [PATCH] Added logserver and fixed Glidein log token and URL
propagation
Added php and python example for a server receiving Glidein logs
Fixed JWT generation and GLIDEIN_LOG_RECIPIENTS_FACTORY param for Factory logserver, improved also code and docstrings, and added documentation for logging and log server
Added example custom script using Glidein logging
Fixed mod_ssl dependency and fixed ReleaseManagerLib bugs and replaced deprecated optparse with argparse
Added the glideinwms-logserver RPM and adjusted ReleaseManagerLib for it
---
.reuse/dep5 | 4 +
CHANGELOG.md | 2 +
build/ReleaseManager/ReleaseManagerLib.py | 54 +++++-
build/ReleaseManager/release.py | 84 ++++-----
build/packaging/rpm/glideinwms.spec | 53 +++++-
build/packaging/rpm/gwms-logserver.conf.httpd | 63 +++++++
creation/lib/cgWCreate.py | 4 +-
creation/web_base/glidein_startup.sh | 21 ++-
creation/web_base/logging_test.sh | 59 ++++++
creation/web_base/logging_utils.source | 67 +++----
doc/factory/custom_scripts.html | 63 +++++--
doc/factory/custom_vars.html | 16 ++
factory/glideFactory.py | 145 +++++++--------
logserver/README.md | 73 ++++++++
logserver/getjwt.py | 94 ++++++++++
logserver/jwt.php | 74 ++++++++
logserver/logging_config.json | 10 +
logserver/web-area/put.php | 171 ++++++++++++++++++
18 files changed, 859 insertions(+), 198 deletions(-)
create mode 100644 build/packaging/rpm/gwms-logserver.conf.httpd
create mode 100644 creation/web_base/logging_test.sh
create mode 100644 logserver/README.md
create mode 100644 logserver/getjwt.py
create mode 100644 logserver/jwt.php
create mode 100644 logserver/logging_config.json
create mode 100644 logserver/web-area/put.php
diff --git a/.reuse/dep5 b/.reuse/dep5
index 727b2c5d48..20eae377e2 100644
--- a/.reuse/dep5
+++ b/.reuse/dep5
@@ -13,6 +13,10 @@ Files: .codecov.yml .coveragerc .editorconfig .gitattributes .gitignore .gitmodu
Copyright: 2009 Fermi Research Alliance, LLC
License: Apache-2.0
+Files: logserver/logging_config.json
+Copyright: 2009 Fermi Research Alliance, LLC
+License: Apache-2.0
+
Files: .github/ISSUE_TEMPLATE/* bigfiles/* etc/* config/* creation/templates/*.service creation/templates/*.cron creation/templates/*.timer doc/tags*txt lib/logging.conf test/bats/fixtures/* unittests/*.fixture unittests/fixtures/*
Copyright: 2009 Fermi Research Alliance, LLC
License: Apache-2.0
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 946bf12ae1..b5bc632756 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ Changes since the last release
- item one of the list
- item N
+- Added custom log server example (glideinwms-logging) (Issue #398, PR #467)
### Changed defaults / behaviours
@@ -21,6 +22,7 @@ Changes since the last release
### Bug Fixes
- Fixed early truncation in log files configuration and inconsistent documentation (Issue #464, PR #462, PR #463)
+- Fixed JWT logging credentials not transferred to the Glidein. This includes removal of DictFile.append() and use of add_environment() for JWT tokens (Issue #398, PR #467)
### Testing / Development
diff --git a/build/ReleaseManager/ReleaseManagerLib.py b/build/ReleaseManager/ReleaseManagerLib.py
index daaf31d270..d440cd3a88 100644
--- a/build/ReleaseManager/ReleaseManagerLib.py
+++ b/build/ReleaseManager/ReleaseManagerLib.py
@@ -38,11 +38,11 @@ def __init__(self, ver, srcDir, relDir, rc, rpmRel):
self.rpmVersion = self.versionToRPMVersion(ver)
self.rpmbuildDir = os.path.join(self.releaseDir, "rpmbuild")
self.rpmOSVersion = self.getElVersion()
+ # SRPM name has "el" also for AlmaLinux
self.srpmFile = os.path.join(
self.rpmbuildDir,
"SRPMS",
- "glideinwms-%s-%s.%s%s.src.rpm"
- % (self.rpmVersion, self.rpmRelease, self.rpmOSVersion[0], self.rpmOSVersion[1]),
+ f"glideinwms-{self.rpmVersion}-{self.rpmRelease}.{self.rpmOSVersion[0]}{self.rpmOSVersion[1]}.src.rpm",
)
self.buildRPMs = bool(which("rpmbuild"))
if not self.buildRPMs:
@@ -81,22 +81,34 @@ def getElVersion(self):
# Deprecated - distname, version, id = distro.linux_distribution()
distname = distro.name() # If full_distribution_name is false, the result of distro.id()
version = distro.version()
- id = distro.codename()
+ dist_id = distro.codename()
else:
# TODO: remove the else branch once Py3.6 is no more supported
- distname, version, id = platform.linux_distribution() # pylint: disable=no-member
- distmap = {"Fedora": "fc", "Scientific Linux": "el", "Red Hat": "el", "CentOS Stream": "el"}
+ distname, version, dist_id = platform.linux_distribution() # pylint: disable=no-member
+ # Check if mock profiles changed
+ # As of Dec 2024 on AlmaLinux9: alma+epel-..., rhel+epel-..., centos-stream+epel-...
+ # No profile has epel-... (maybe rhel-7 for sl7)
+ distmap = {
+ "Fedora": ("fc", "fedora"),
+ "Scientific Linux": ("el", "epel"),
+ "Red Hat": ("el", "rhel+epel"),
+ "CentOS Stream": ("el", "centos-stream+epel"),
+ "AlmaLinux": ("el", "alma+epel"),
+ "RockyLinux": ("el", "rocky+epel"),
+ }
dist = None
+ el_profile = None
for d in distmap:
if distname.startswith(d):
- dist = distmap[d]
+ dist = distmap[d][0]
+ el_profile = distmap[d][1]
break
if dist is None:
raise Exception("Unsupported distribution: %s" % distname)
else:
el_string = dist
major_version = version.split(".")[0]
- return (el_string, major_version)
+ return el_string, major_version, el_profile
def addTask(self, task):
self.tasks.append(task)
@@ -277,9 +289,10 @@ def checksumRelease(self, chksumFile, exclude):
class TaskRPM(TaskTar):
- def __init__(self, rel, python_version, use_mock=True):
+ def __init__(self, rel, python_version, use_mock=True, verbose=False):
TaskTar.__init__(self, rel)
self.name = "GlideinwmsRPM"
+ self.verbose = verbose
self.use_mock = use_mock
self.python_version = python_version
self.releaseFile = os.path.join(self.release.releaseDir, self.releaseFilename)
@@ -288,6 +301,7 @@ def __init__(self, rel, python_version, use_mock=True):
self.specFile = os.path.join(self.release.rpmbuildDir, "SPECS", "glideinwms.spec")
# self.rpmmacrosFile = os.path.join(os.path.expanduser('~'),
self.rpmmacrosFile = os.path.join(os.path.dirname(self.release.rpmbuildDir), ".rpmmacros")
+ # Files in build/pkg/rpm to copy in the SOURCES directory
self.sourceFilenames = [
"chksum.sh",
"factory_startup",
@@ -300,6 +314,7 @@ def __init__(self, rel, python_version, use_mock=True):
"gwms-factory.sysconfig",
"gwms-frontend.conf.httpd",
"gwms-frontend.sysconfig",
+ "gwms-logserver.conf.httpd",
]
self.rpmMacros = {
"_topdir": self.release.rpmbuildDir,
@@ -318,6 +333,8 @@ def createRPMBuildDirs(self):
rpm_dirs = ["BUILD", "RPMS", "SOURCES", "SPECS", "SRPMS"]
for dirname in rpm_dirs:
create_dir(os.path.join(self.release.rpmbuildDir, dirname))
+ if self.verbose:
+ print(f"RPM build directories created in {self.release.rpmbuildDir}")
def createSpecFile(self):
# No error checking because we want to fail in case of errors
@@ -351,33 +368,50 @@ def buildSRPM(self):
cmd = "rpmbuild -bs %s" % self.specFile
for m in self.rpmMacros:
cmd = f'{cmd} --define "{m} {self.rpmMacros[m]}"'
+ if self.verbose:
+ print(f"Building source RPM: {cmd}")
execute_cmd(cmd)
def buildRPM(self):
- cmd = "mock -r epel-{}-x86_64 --macro-file={} -i {}".format(
+ # Check if mock profiles changed
+ # As of Dec 2024 on AlmaLinux9: alma+epel-..., rhel+epel-..., centos-stream+epel-...
+ # No profile has epel-... (maybe rhel-7 for sl7)
+ cmd = "mock -r {}-{}-x86_64 --macro-file={} -i {}".format(
+ self.release.rpmOSVersion[2],
self.release.rpmOSVersion[1],
self.rpmmacrosFile,
self.python_version,
)
+ if self.verbose:
+ print(f"Build mock environment: {cmd}")
execute_cmd(cmd)
- cmd = "mock --no-clean -r epel-{}-x86_64 --macro-file={} --resultdir={}/RPMS rebuild {}".format(
+ cmd = "mock --no-clean -r {}-{}-x86_64 --macro-file={} --resultdir={}/RPMS rebuild {}".format(
+ self.release.rpmOSVersion[2],
self.release.rpmOSVersion[1],
self.rpmmacrosFile,
self.release.rpmbuildDir,
self.release.srpmFile,
)
+ if self.verbose:
+ print(f"Build RPM with mock: {cmd}")
execute_cmd(cmd)
def buildRPMWithRPMBuild(self):
cmd = "rpmbuild -bb %s" % self.specFile
for m in self.rpmMacros:
cmd = f'{cmd} --define "{m} {self.rpmMacros[m]}"'
+ if self.verbose:
+ print(f"Build RPM without mock: {cmd}")
execute_cmd(cmd)
def execute(self):
if not self.release.buildRPMs:
self.status = "SKIPPED"
else:
+ if self.verbose:
+ print(
+ f"Building RPM (version:{self.release.rpmVersion}, release:{self.release.rpmRelease}, use_mock:{self.use_mock})"
+ )
# First build the source tarball
# TaskTar.execute(self)
diff --git a/build/ReleaseManager/release.py b/build/ReleaseManager/release.py
index c865b000c5..6cf82e7425 100755
--- a/build/ReleaseManager/release.py
+++ b/build/ReleaseManager/release.py
@@ -6,7 +6,8 @@
import sys
import os
-import optparse
+import argparse
+from argparse import RawDescriptionHelpFormatter
# Necessary to allow relative import when started as executable
if __name__ == "__main__" and __package__ is None:
@@ -42,7 +43,7 @@ def manager_version():
def usage():
- help = [
+ help_str = [
"%s " % os.path.basename(sys.argv[0]),
"NOTE that this script works on the files in your current directory tree",
"- no git operations like clone/checkout are performed",
@@ -64,113 +65,92 @@ def usage():
"release.py --release-version=3_2_11 --source-dir=/home/parag/glideinwms --release-dir=/var/tmp/release --rpm-release=3",
"",
]
- return "\n".join(help)
+ return "\n".join(help_str)
def parse_opts(argv):
- parser = optparse.OptionParser(usage=usage(), version=manager_version(), conflict_handler="resolve")
- parser.add_option(
+ parser = argparse.ArgumentParser(
+ prog=os.path.basename(sys.argv[0]),
+ description=usage(),
+ conflict_handler="resolve",
+ formatter_class=RawDescriptionHelpFormatter,
+ )
+ parser.add_argument(
"--release-version",
- dest="relVersion",
action="store",
+ required=True,
metavar="",
help="GlideinWMS version to release (format w/ underscores, for tarball, RPM version derived from it)",
)
- parser.add_option(
+ parser.add_argument(
"--source-dir",
- dest="srcDir",
action="store",
+ required=True,
metavar="
-
+
+
Logging
+
+ Standard output and standard error of all custom scripts (except the
+ periodic ones) is captured in the Glidein stdout and stderr and it is
+ transferred back to the Factory by HTCondor at the end of the Glidein.
+ Anyway this process may be insufficient: if the Glidein is killed the
+ transfer may not happen, if there are multi-Glideins all their
+ stdout/err are intermixed in the same files, and a user may desire to
+ have this output back earlier or in a different place. For all these
+ needs there is also a logging utility. It is defined in
+ logging_util.source, can be used in any custom script,
+ requires a web server to receive the logging messages, and needs to
+ set up GLIDEIN_LOG_RECIPIENTS_FACTORY as attr in the
+ Factory configuration. The Web servers at the URLs in
+ GLIDEIN_LOG_RECIPIENTS_FACTORY must be able to receive
+ JWT-authenticated PUT requests HS256-encoded with the secret set in
+ the Factory secret file
+ (/var/lib/gwms-factory/server-credentials/jwt_secret.key).
+ This secret file must be HMAC 256 compatible, e.g. a 32 bytes string.
+ The Factory will create the file at startup if it is not there or
+ empty.
+
Change an existing value based on conditions found
fi
# write it back
- add_config_line VO_SCALABILITY $vo_scalability
+ gconfig_add VO_SCALABILITY $vo_scalability
"$error_gen" -ok "memset.sh" "vo_scalability" "$vo_scalability"
exit 0
fi
diff --git a/doc/factory/custom_vars.html b/doc/factory/custom_vars.html
index 0c1bf70d6e..70d18e49c3 100644
--- a/doc/factory/custom_vars.html
+++ b/doc/factory/custom_vars.html
@@ -1139,6 +1139,22 @@
+
+
GLIDEIN_LOG_RECIPIENTS_FACTORY
+
String
+
""
+
+
+
+ Space separated list of URLs to use to publish the
+ custom logs.
+
+
+ A token-authenticated Web server must be running at the given
+ URL to receive the POST requests from the Glideins
+
+
+
diff --git a/factory/glideFactory.py b/factory/glideFactory.py
index dfad6d267b..35eacf9a2b 100755
--- a/factory/glideFactory.py
+++ b/factory/glideFactory.py
@@ -11,12 +11,15 @@
import copy
import fcntl
-import glob
+
+# import glob
import json
import math
import os
import resource
+import secrets
import signal
+import stat
import subprocess
import sys
import tarfile
@@ -42,7 +45,6 @@
)
from glideinwms.lib import cleanupSupport, condorMonitor, glideinWMSVersion, logSupport, util
from glideinwms.lib.condorMonitor import CondorQEdit, QueryError
-from glideinwms.lib.pubCrypto import RSAKey
FACTORY_DIR = os.path.dirname(glideFactoryLib.__file__)
@@ -76,10 +78,8 @@ def aggregate_stats(in_downtime):
def update_classads():
- """Loads the aggregate job summary pickle files, and then
+ """Load the aggregate job summary pickle files, and then
quedit the finished jobs adding a new classad called MONITOR_INFO with the monitor information.
-
- :return:
"""
jobinfo = glideFactoryMonitorAggregator.aggregateJobsSummary()
for cnames, joblist in jobinfo.items():
@@ -98,14 +98,12 @@ def update_classads():
def save_stats(stats, fname):
"""Serialize and save aggregated statistics so that each component (Factory and Entries)
- can retrieve and use it to log and advertise
-
- stats is a dictionary pickled in binary format
- stats['LogSummary'] - log summary aggregated info
+ can retrieve and use them for logging and advertising.
- :param stats: aggregated Factory statistics
- :param fname: name of the file with the serialized data
- :return:
+ Args:
+ stats (dict): Aggregated Factory statistics dictionary. stats is a dictionary pickled in binary format
+ stats['LogSummary'] - log summary aggregated info
+ fname (str): Name of the file to store the serialized data.
"""
util.file_pickle_dump(
fname, stats, mask_exceptions=(logSupport.log.exception, "Saving of aggregated statistics failed: ")
@@ -114,15 +112,12 @@ def save_stats(stats, fname):
# Added by C.W. Murphy to make descript.xml
def write_descript(glideinDescript, frontendDescript, monitor_dir):
- """
- Write the descript.xml to the monitoring directory
-
- @type glideinDescript: glideFactoryConfig.GlideinDescript
- @param glideinDescript: Factory config's glidein description object
- @type frontendDescript: glideFactoryConfig.FrontendDescript
- @param frontendDescript: Factory config's frontend description object
- @type monitor_dir: String
- @param monitor_dir: Path to monitoring directory
+ """Write the descript.xml file to the specified monitoring directory.
+
+ Args:
+ glideinDescript (glideFactoryConfig.GlideinDescript): Factory config's Glidein description object.
+ frontendDescript (glideFactoryConfig.FrontendDescript): Factory config's Frontend description object.
+ monitor_dir (str): Path to the monitoring directory.
"""
glidein_data = copy.deepcopy(glideinDescript.data)
@@ -156,54 +151,63 @@ def write_descript(glideinDescript, frontendDescript, monitor_dir):
############################################################
-def generate_log_tokens(startup_dir, glideinDescript):
+def generate_log_tokens(startup_dir, glidein_descript):
"""Generate the JSON Web Tokens used to authenticate with the remote HTTP log server.
- Note: tokens are generated for disabled entries too
+ Note: tokens were generated for disabled entries too, not now
Args:
startup_dir (str|Path): Path to the glideinsubmit directory
- glideinDescript: Factory config's glidein description object
+ glidein_descript (glideFactoryConfig.GlideinDescript): Factory config's Glidein description object
Returns:
None
Raises:
- IOError: If can't open/read/write a file (key/token)
+ IOError: If it can't open/read/write a file (key/token)
"""
logSupport.log.info("Generating JSON Web Tokens for authentication with log server")
# Get a list of all entries, enabled and disabled
# TODO: there are more reliable ways to do so, i.e. reading the xml config
- entries = [ed[len("entry_") :] for ed in glob.glob("entry_*") if os.path.isdir(ed)]
+ # entries = [ed[len("entry_") :] for ed in glob.glob("entry_*") if os.path.isdir(ed)]
+ # OK to generate tokens only for enabled entries
+ entries = glidein_descript.data["Entries"].split(",")
# Retrieve the factory secret key (manually delivered) for token generation
credentials_dir = os.path.realpath(os.path.join(startup_dir, "..", "server-credentials"))
jwt_key = os.path.join(credentials_dir, "jwt_secret.key")
- if not os.path.exists(jwt_key):
- # create one and log if it doesnt exist, otherwise needs a
- # manual undocumented step to start factory
- logSupport.log.info(
- "creating %s -manually install this key for " % (jwt_key) + "authenticating to external web sites"
- )
- rsa = RSAKey()
- rsa.new(2048)
- rsa.save(jwt_key)
+ if not os.path.exists(jwt_key) or os.path.getsize(jwt_key) == 0:
+ # Create a secret and log if it doesn't exist, otherwise needs a manual undocumented step to start factory
+ # For HS256 JWT (HMAC 256) a 32 bytes string is needed. A PEM file like the one from RSAKey() would cause
+ # jwt.exceptions.InvalidKeyError: The specified key is an asymmetric key or x509 certificate and
+ # should not be used as an HMAC secret.
+ # TODO: consider base64 encoding before saving sec_key (server code must be changed as well)
+ # TODO: add support for multiple secrets from different servers (RSA asymmetric or HMAC symmetric)
+ # or should they provide (and refresh) tokens?
+ logSupport.log.info(f"creating {jwt_key} - manually install this key for authenticating to external web sites")
+ log_token_key = secrets.token_bytes(32)
+ # The file system is not a safe place to store secrets, but this key is used to control access to the logserver
+ # which reside on the file system. So someone with access to this key could access the logserver as well
+ with open(jwt_key, "wb") as file:
+ file.write(log_token_key)
+ os.chmod(jwt_key, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
+ # TODO: chown gfactory:apache AND chmod u:rw,g:r
try:
- with open(os.path.join(credentials_dir, "jwt_secret.key")) as keyfile:
- secret = keyfile.readline().strip()
+ with open(jwt_key, "rb") as keyfile:
+ secret = keyfile.read()
except OSError:
- logSupport.log.exception("Cannot find the key for JWT generation (must be manually deposited).")
+ logSupport.log.exception(f"Cannot find the key for JWT generation (must be manually deposited in {jwt_key}).")
raise
- factory_name = glideinDescript.data["FactoryName"]
+ factory_name = glidein_descript.data["FactoryName"]
# Issue a token for each entry-recipient pair
for entry in entries:
# Get the list of recipients
- if "LOG_RECIPIENTS_FACTORY" in glideFactoryConfig.JobParams(entry).data:
- log_recipients = glideFactoryConfig.JobParams(entry).data["LOG_RECIPIENTS_FACTORY"].split()
+ if "GLIDEIN_LOG_RECIPIENTS_FACTORY" in glideFactoryConfig.JobParams(entry).data:
+ log_recipients = glideFactoryConfig.JobParams(entry).data["GLIDEIN_LOG_RECIPIENTS_FACTORY"].split()
else:
log_recipients = []
@@ -254,7 +258,7 @@ def generate_log_tokens(startup_dir, glideinDescript):
"aud": recipient_safe_url,
"iat": curtime,
"exp": curtime + 604800,
- "nbf": curtime - 300,
+ "nbf": curtime - 300, # To compensate for possible clock skews
}
token = jwt.encode(token_payload, secret, algorithm="HS256")
# TODO: PyJWT bug workaround. Remove this conversion once affected PyJWT is no more around
@@ -438,25 +442,17 @@ def spawn(
restart_interval,
):
"""
- Spawn and keep track of the entry processes. Restart them if required.
- Advertise glidefactoryglobal classad every iteration
+ Spawn and track entry processes, restarting them as needed. Advertise glidefactoryglobal ClassAds every iteration.
- @type sleep_time: long
- @param sleep_time: Delay between every iteration
- @type advertize_rate: long
- @param advertize_rate: Rate at which entries advertise their classads
- @type startup_dir: String
- @param startup_dir: Path to glideinsubmit directory
- @type glideinDescript: glideFactoryConfig.GlideinDescript
- @param glideinDescript: Factory config's glidein description object
- @type frontendDescript: glideFactoryConfig.FrontendDescript
- @param frontendDescript: Factory config's frontend description object
- @type entries: list
- @param entries: Sorted list of entry names
- @type restart_interval: long
- @param restart_interval: Allowed restart interval in second
- @type restart_attempts: long
- @param restart_attempts: Number of allowed restart attempts in the interval
+ Args:
+ sleep_time (int): Delay between iterations in seconds.
+ advertize_rate (int): Rate at which entries advertise their ClassAds.
+ startup_dir (str): Path to the glideinsubmit directory.
+ glideinDescript (glideFactoryConfig.GlideinDescript): Factory config's Glidein description object.
+ frontendDescript (glideFactoryConfig.FrontendDescript): Factory config's Frontend description object.
+ entries (list): Sorted list of entry names.
+ restart_interval (int): Allowed restart interval in seconds.
+ restart_attempts (int): Number of allowed restart attempts within the interval.
"""
childs = {}
@@ -488,14 +484,20 @@ def spawn(
entry_groups = entry_grouper(group_size, entries)
def _set_rlimit(soft_l=None, hard_l=None):
- # set new hard and soft open file limits
- # if setting limits fails or no input parameters use inherited limits
- # from parent process
- # nb 1. it is possible to raise limits
- # up to [hard_l,hard_l] but once lowered they cannot be raised
- # nb 2. it may be better just to omit calling this function at
- # all from subprocess - in which case it inherits limits from
- # parent process
+ """Set new hard and soft open file limits
+
+ If setting limits fails or no input parameters use inherited limits from parent process
+ NOTE1: it is possible to raise limits up to [hard_l,hard_l] but once lowered they cannot be raised
+ NOTE2: it may be better just to omit calling this function at all from subprocess -
+ in which case it inherits limits from the parent process
+
+ Args:
+ soft_l (int): soft limit
+ hard_l (int): hard limit
+
+ Raises:
+ Exception: if the limit setting fails
+ """
lim = resource.getrlimit(resource.RLIMIT_NOFILE)
if soft_l is not None or hard_l is not None:
@@ -544,13 +546,13 @@ def _set_rlimit(soft_l=None, hard_l=None):
generate_log_tokens(startup_dir, glideinDescript)
for group in childs:
- # set it in non blocking mode
+ # set it in non-blocking mode
# since we will run for a long time, we do not want to block
for fd in (childs[group].stdout.fileno(), childs[group].stderr.fileno()):
fl = fcntl.fcntl(fd, fcntl.F_GETFL)
fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
- # If RemoveOldCredFreq < 0, do not do credential cleanup.
+ # If RemoveOldCredFreq <= 0, do not do credential cleanup.
curr_time = 0 # To ensure curr_time is always initialized
if int(glideinDescript.data["RemoveOldCredFreq"]) > 0:
# Convert credential removal frequency from hours to seconds
@@ -565,9 +567,8 @@ def _set_rlimit(soft_l=None, hard_l=None):
logSupport.log.info("Adding cleaners for old credentials")
cred_base_dir = glideinDescript.data["ClientProxiesBaseDir"]
for username in frontendDescript.get_all_usernames():
- cred_base_user = os.path.join(cred_base_dir, "user_%s" % username)
cred_user_instance_dirname = os.path.join(
- cred_base_user, "glidein_%s" % glideinDescript.data["GlideinName"]
+ cred_base_dir, "user_%s" % username, "glidein_%s" % glideinDescript.data["GlideinName"]
)
cred_cleaner = cleanupSupport.DirCleanupCredentials(
cred_user_instance_dirname, "(credential_*)", remove_old_cred_age
diff --git a/logserver/README.md b/logserver/README.md
new file mode 100644
index 0000000000..efa3a58902
--- /dev/null
+++ b/logserver/README.md
@@ -0,0 +1,73 @@
+
+
+# Glidein Logging Server
+
+This is a simple server using the httpd server to receive Glidein logs via PUT
+
+- getjwt.py is a python script to generate tokens
+- put.php is a script to receive JWT authenticated HTTP PUT requests
+- jwt.php is a test script to generate or verify JWT tokens
+- logging.config.json is a config file for both PHP scripts
+- the httpd config to allow PUT is in build/packaging/rpm/gwms-logserver.conf.httpd
+
+The `put.php` script requires the `uploads` and `uploads_unauthorized` sub-directories.
+Both PHP scripts require PHP and php-fpm to be executed by the Web server:
+
+```commandline
+# run as root
+dnf install php
+dnf install php-fpm
+systemctl start php-fpm
+systemctl enable php-fpm httpd
+```
+
+Ref: https://docs.redhat.com/en/documentation/red_hat_enterprise_linux/9/html/installing_and_using_dynamic_programming_languages/assembly_using-the-php-scripting-language_installing-and-using-dynamic-programming-languages
+Both PHP scripts use [Firebase PHP-JWT](https://github.com/firebase/php-jwt)
+installed via [Composer](https://getcomposer.org/download/)
+as done i n[this tutorial](https://www.sitepoint.com/php-authorization-jwt-json-web-tokens/).
+
+Once Apache 2.5 (now dev version) or 2.6 are available you can use
+[mod_auth_jwt](https://httpd.apache.org/docs/trunk/mod/mod_autht_jwt.html) and
+[mod_auth_bearer](https://httpd.apache.org/docs/trunk/mod/mod_auth_bearer.html)
+to enable JWT bearer token authentication.
+
+## Apache troubleshooting
+
+You can use `apachectl configtest` to verify if your httpd configuration is correct
+(Apache silently ignores bad config files).
+More suggestions at
+
+E.g. you may need to set selinux context:
+
+```commandline
+# run as root
+semanage fcontext -a -t httpd_sys_content_t "/srv/example.com(/.*)?"
+restorecon -Rv /srv/example.com/
+semanage fcontext -a -t httpd_sys_content_t "/srv/example.net(/.\*)?"
+restorecon -Rv /srv/example.net/
+```
+
+To troubleshoot httpd you may increase the log level using `/etc/httpd/conf.d/temp_debug.conf` as
+[suggested here](https://serverfault.com/a/1168882/1189965):
+
+```
+LogLevel trace4
+GlobalLog "logs/debug.log" "%v:%p %h %l %u %t \"%r\" %>s %O file=%f"
+
+# http://httpd.apache.org/docs/current/mod/mod_log_config.html#formats
+# %v The canonical ServerName of the server serving the request.
+# %f Filename.
+```
+
+To see the PHP error messages in `put.php` you need to edit `/etc/php.ini` and enable the Development options like:
+
+```doctest
+error_reporting = E_ALL
+display_errors = On
+display_startup_errors = On
+```
+
+Remember to disable that for production
diff --git a/logserver/getjwt.py b/logserver/getjwt.py
new file mode 100644
index 0000000000..b922a1df0a
--- /dev/null
+++ b/logserver/getjwt.py
@@ -0,0 +1,94 @@
+#!/bin/env python3
+# SPDX-FileCopyrightText: 2009 Fermi Research Alliance, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+"""Print a JWT token
+python getjwt.py -k your_secret_key -i https://your-issuer.com
+Options
+-k --key JWT signing key
+-
+"""
+
+import argparse
+import os
+import socket
+import time
+import urllib.parse
+
+import jwt
+
+
+def log(msg: str):
+ print(msg)
+
+
+tokens_dir = "./"
+this_host = socket.gethostname()
+# Parse command-line arguments
+parser = argparse.ArgumentParser(description="Generate a JWT token for the GlideinWMS logging.")
+parser.add_argument("-k", "--key", default=None, help="Secret key for JWT signing (overrides the key-file).")
+parser.add_argument("-K", "--key-file", required=False, help="Binary file containing the secret key for JWT signing.")
+parser.add_argument(
+ "-d", "--duration", type=int, default=604800, help="Duration of the token in seconds (default: 3600)."
+)
+parser.add_argument("-e", "--entry", default="TEST_TOKEN", help="Entry, for the Subject (sub) claim for the JWT.")
+parser.add_argument("-f", "--factory", default=this_host, help="Factory, for Issuer (iss) claim for the JWT.")
+parser.add_argument("-a", "--algorithm", default="HS256", help="JWT encoding algorithm.")
+parser.add_argument(
+ "-u", "--log-url", default=f"http://{this_host}/logging/put.php", help="Issuer (iss) claim for the JWT."
+)
+parser.add_argument("-o", "--output", default=None, help="Output file to write the JWT.")
+args = parser.parse_args()
+if args.key is None and not args.key_file:
+ # log("ERROR: You must provide a key string or a key file")
+ parser.error("ERROR: You must provide a key string or a key file")
+
+curtime = int(time.time())
+
+token_key = args.key
+if token_key is None:
+ if args.key_file:
+ try:
+ with open(args.key_file, "rb") as key_file:
+ token_key = key_file.read()
+ except OSError:
+ log(f"ERROR: Unable to read token key from file: {args.key_file}")
+ raise
+
+# Define payload with issuer from arguments
+# Payload fields:
+# iss->issuer, sub->subject, aud->audience
+# iat->issued_at, exp->expiration, nbf->not_before
+payload = {
+ "user_id": 123, # Replace with actual user ID or other data
+ "iss": args.factory, # Set issuer from command-line argument
+ "sub": args.entry,
+ # Obtain a legal filename safe string from the url, escaping "/" and other tricky symbols
+ "aud": urllib.parse.quote(args.log_url, ""),
+ # "issued_at": curtime,
+ "iat": curtime,
+ "exp": curtime + args.duration,
+ "nbf": curtime - 300,
+}
+
+# Generate JWT using secret key from arguments
+print(f"Encoding token with key: <{token_key}>")
+token = jwt.encode(payload, token_key, algorithm=args.algorithm)
+# TODO: PyJWT bug workaround. Remove this conversion once affected PyJWT is no more around
+# PyJWT in EL7 (PyJWT <2.0.0) has a bug, jwt.encode() is declaring str as return type, but it is returning bytes
+# https://github.com/jpadilla/pyjwt/issues/391
+if isinstance(token, bytes):
+ token = token.decode("UTF-8")
+
+if args.output is None:
+ print(token)
+else:
+ token_filepath = os.path.join(tokens_dir, args.output)
+ try:
+ # Write the token to a text file
+ with open(token_filepath, "w") as tkfile:
+ tkfile.write(token)
+ log(f"Token for {args.log_url} ({urllib.parse.quote(args.log_url, '')}) written to {token_filepath}")
+ except OSError:
+ log(f"ERROR: Unable to create JWT file: {token_filepath}")
+ raise
diff --git a/logserver/jwt.php b/logserver/jwt.php
new file mode 100644
index 0000000000..8a89790801
--- /dev/null
+++ b/logserver/jwt.php
@@ -0,0 +1,74 @@
+ 'http://example.org',
+ 'aud' => 'http://example.com',
+ 'iat' => 1356999524,
+ 'nbf' => 1357000000
+];
+
+echo "Encoding/decoding payload using key: <$key>\n";
+
+if ($argc>1) {
+ if ($argc==2) {
+ echo "Decoding token in $argv[1]\n";
+ $jwt = file_get_contents($argv[1]);
+ print_r($jwt);
+ $decoded = JWT::decode($jwt, new Key($key, 'HS256'));
+ print_r($decoded);
+ } else {
+ echo "Encoding payload in $argv[1]\n";
+ $payload = json_decode(file_get_contents($argv[1]));
+ $payload_array = (array) $payload;
+ $jwt = JWT::encode($payload_array, $key, 'HS256');
+ print_r($jwt);
+ echo "Saving token to $argv[2]\n";
+ file_put_contents($argv[2], $jwt);
+ $decoded = JWT::decode($jwt, new Key($key, 'HS256'));
+ print_r($decoded);
+ }
+} else {
+ echo "Encode/decode test with payload\n";
+ $jwt = JWT::encode($payload, $key, 'HS256');
+ print_r($jwt);
+ $decoded = JWT::decode($jwt, new Key($key, 'HS256'));
+ print_r($decoded);
+}
+
+?>
diff --git a/logserver/logging_config.json b/logserver/logging_config.json
new file mode 100644
index 0000000000..5bb4e2c659
--- /dev/null
+++ b/logserver/logging_config.json
@@ -0,0 +1,10 @@
+{
+ "secret_key_path": "/var/lib/gwms-factory/server-credentials/jwt_secret.key",
+ "token_issuer": "gfactory_service",
+ "uri_regex_file_name": "#logging/put.php/(\\S+)#",
+ "upload_dir": "uploads/",
+ "upload_dir_unauthorized": "uploads_unauthorized/",
+ "require_authentication": true,
+ "verbose": false,
+ "debug": false
+}
diff --git a/logserver/web-area/put.php b/logserver/web-area/put.php
new file mode 100644
index 0000000000..e0523f15cc
--- /dev/null
+++ b/logserver/web-area/put.php
@@ -0,0 +1,171 @@
+", "..");
+ // every forbidden character is replaced by an underscore
+ $safeFilename = str_replace($dangerousCharacters, '_', $unsafeFilename);
+ return $safeFilename;
+}
+
+function auth_failed($headerCode, $msg){
+ if ($GLOBALS['requireAuthentication']) {
+ http_response_code($headerCode);
+ echo "$headerCode: $msg\n";
+ exit;
+ }
+ if ($GLOBALS['verbose']) {
+ echo "$headerCode: $msg\n";
+ }
+}
+
+
+/* The file name (default from the config file) is specified in the URI either as
+ GET parameter fname put.php?fname=NAME
+ or as path logging/put.php/NAME
+ */
+$fileName = $_GET['fname'] ?? $defaultFileName;
+if (preg_match($uriRegexFileName, $_SERVER['REQUEST_URI'], $matches)) {
+ $fileName = $matches[1];
+}
+
+// Not authorized unless the token verification is successful
+$isAuthorized = False;
+// Get authorization header and extract the token
+$authorizationHeader = getallheaders()["Authorization"] ?? "";
+//$token = preg_match("/Bearer (?P[^\s]+)/", $authorizationHeader, $matches) ? $matches["token"] : "";
+if (! preg_match('/Bearer\s(?P\S+)/', $authorizationHeader, $matches)) {
+ // No token in request
+ auth_failed(400, 'Token not found in request');
+ $jwt = "";
+} else {
+ $jwt = $matches["token"];
+}
+if (! $jwt) {
+ // Unable to extract token from the authorization header
+ auth_failed(400, 'Unable to extract token from the authorization header');
+} else {
+ try {
+ // Read secret key from file or set to the default
+ $secretKey = trim(file_get_contents($secretKeyFile)) ?: $defaultSecretKey;
+ $token = JWT::decode($jwt, new Key($secretKey, 'HS256'));
+ $now = new DateTimeImmutable();
+ $serverAddress = 'https://' . $_SERVER['SERVER_ADDR'];
+ $serverName = 'https://' . $_SERVER['SERVER_NAME'];
+ // Verify token (iss, aud and expiration)
+ if ($token->iss !== $tokenIssuer ||
+ ( ! str_starts_with($token->aud, urlencode($serverAddress)) &&
+ ! str_starts_with($token->aud, urlencode($serverName))) ||
+ $token->nbf > $now->getTimestamp() ||
+ $token->exp < $now->getTimestamp())
+ {
+ if ($debug) {
+ $res0 = ! str_starts_with($token->aud, urlencode($serverAddress)) &&
+ ! str_starts_with($token->aud, urlencode($serverName)) ? 'true' : 'false';
+ $res1 = str_starts_with($token->aud, urlencode($serverAddress)) ? 'true' : 'false';
+ $res2 = urlencode($serverAddress);
+ $res3 = str_starts_with($token->aud, urlencode($serverName)) ? 'true' : 'false';
+ $res4 = urlencode($serverName);
+ $res5 = $token->nbf > $now->getTimestamp() ? 'true' : 'false';
+ $res6 = $token->exp < $now->getTimestamp() ? 'true' : 'false';
+ $res7 = $token->iss !== $tokenIssuer ? 'true' : 'false';
+ echo "Authorization failed:\n- iss $res7: {$token->iss} VS {$tokenIssuer}\n".
+ "- aud $res0, addr $res1, name $res3: {$token->aud} VS $res2, $res4\n- nbf $res5\n- exp $res6\n";
+ }
+ auth_failed(401, 'Wrong authorization (wrong claims or token expired)');
+ } else {
+ $isAuthorized = True;
+ }
+ } catch (Firebase\JWT\SignatureInvalidException $e) {
+ auth_failed(401, 'Invalid JWT signature');
+ } catch (Exception $e) {
+ auth_failed(500, 'Error decoding JWT');
+ }
+}
+
+// debug variables printout
+if ($debug) {
+ var_dump( get_defined_vars() );
+}
+
+$fname = filename_sanitizer($fileName);
+if (! $isAuthorized) {
+ $uploadPath = $uploadPathUnauthorized;
+}
+
+// Save the file in the desired location and return error upon failure
+try {
+ // PUT data comes in on the stdin stream
+ $putdata = fopen('php://input', 'r');
+
+ if ($fp = fopen($uploadPath . $fname, 'w')) {
+ // Read the data 1 KB at a time and write to the file
+ while ($data = fread($putdata, 1024))
+ fwrite($fp, $data);
+ fclose($fp);
+ } else {
+ http_response_code(500);
+ echo "500: Error opening the output file\n";
+ exit;
+ }
+ fclose($putdata);
+} catch (Exception $e) {
+ http_response_code(500);
+ echo "500: Error saving the upload file\n";
+ exit;
+}
+?>