From 1a96afb06e712d54f7ac30cf3cdf66ff3fc2fb6e Mon Sep 17 00:00:00 2001 From: Alexander Gil Date: Thu, 2 Nov 2023 12:23:03 +0100 Subject: [PATCH 1/7] feat: Add tag based in store name instead of cluster name --- Dockerfile | 2 +- radosgw_usage_exporter.py | 104 +++++++++++++++++++------------------- requirements.txt | 6 +-- 3 files changed, 56 insertions(+), 56 deletions(-) diff --git a/Dockerfile b/Dockerfile index 350b092..71da94d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.9-slim +FROM python:3.11-slim RUN mkdir -p /usr/src/app WORKDIR /usr/src/app diff --git a/radosgw_usage_exporter.py b/radosgw_usage_exporter.py index 5f6eade..c8b89c3 100644 --- a/radosgw_usage_exporter.py +++ b/radosgw_usage_exporter.py @@ -26,12 +26,12 @@ class RADOSGWCollector(object): enabled by 'rgw enable usage log = true' in the appropriate section of ceph.conf see Ceph documentation for details """ - def __init__(self, host, admin_entry, access_key, secret_key, cluster_name, insecure, timeout): + def __init__(self, host, admin_entry, access_key, secret_key, store, insecure, timeout): super(RADOSGWCollector, self).__init__() self.host = host self.access_key = access_key self.secret_key = secret_key - self.cluster_name = cluster_name + self.store = store self.insecure = insecure self.timeout = timeout @@ -141,95 +141,95 @@ def _setup_empty_prometheus_metrics(self): 'ops': CounterMetricFamily('radosgw_usage_ops_total', 'Number of operations', - labels=["bucket", "owner", "category", "cluster", "tags"]), + labels=["bucket", "owner", "category", "store", "tags"]), 'successful_ops': CounterMetricFamily('radosgw_usage_successful_ops_total', 'Number of successful operations', - labels=["bucket", "owner", "category", "cluster", "tags"]), + labels=["bucket", "owner", "category", "store", "tags"]), 'bytes_sent': CounterMetricFamily('radosgw_usage_sent_bytes_total', 'Bytes sent by the RADOSGW', - labels=["bucket", "owner", "category", "cluster", "tags"]), + labels=["bucket", "owner", "category", "store", "tags"]), 'bytes_received': CounterMetricFamily('radosgw_usage_received_bytes_total', 'Bytes received by the RADOSGW', - labels=["bucket", "owner", "category", "cluster", "tags"]), + labels=["bucket", "owner", "category", "store", "tags"]), 'bucket_usage_bytes': GaugeMetricFamily('radosgw_usage_bucket_bytes', 'Bucket used bytes', - labels=["bucket", "owner", "zonegroup", "cluster", "tags"]), + labels=["bucket", "owner", "zonegroup", "store", "tags"]), 'bucket_utilized_bytes': GaugeMetricFamily('radosgw_usage_bucket_utilized_bytes', 'Bucket utilized bytes', - labels=["bucket", "owner", "zonegroup", "cluster", "tags"]), + labels=["bucket", "owner", "zonegroup", "store", "tags"]), 'bucket_usage_objects': GaugeMetricFamily('radosgw_usage_bucket_objects', 'Number of objects in bucket', - labels=["bucket", "owner", "zonegroup", "cluster", "tags"]), + labels=["bucket", "owner", "zonegroup", "store", "tags"]), 'bucket_quota_enabled': GaugeMetricFamily('radosgw_usage_bucket_quota_enabled', 'Quota enabled for bucket', - labels=["bucket", "owner", "zonegroup", "cluster", "tags"]), + labels=["bucket", "owner", "zonegroup", "store", "tags"]), 'bucket_quota_max_size': GaugeMetricFamily('radosgw_usage_bucket_quota_size', 'Maximum allowed bucket size', - labels=["bucket", "owner", "zonegroup", "cluster", "tags"]), + labels=["bucket", "owner", "zonegroup", "store", "tags"]), 'bucket_quota_max_size_bytes': GaugeMetricFamily('radosgw_usage_bucket_quota_size_bytes', 'Maximum allowed bucket size in bytes', - labels=["bucket", "owner", "zonegroup", "cluster", "tags"]), + labels=["bucket", "owner", "zonegroup", "store", "tags"]), 'bucket_quota_max_objects': GaugeMetricFamily('radosgw_usage_bucket_quota_size_objects', 'Maximum allowed bucket size in number of objects', - labels=["bucket", "owner", "zonegroup", "cluster", "tags"]), + labels=["bucket", "owner", "zonegroup", "store", "tags"]), 'bucket_shards': GaugeMetricFamily('radosgw_usage_bucket_shards', 'Number ob shards in bucket', - labels=["bucket", "owner", "zonegroup", "cluster", "tags"]), + labels=["bucket", "owner", "zonegroup", "store", "tags"]), 'user_metadata': GaugeMetricFamily('radosgw_user_metadata', 'User metadata', - labels=["user", "display_name", "email", "storage_class", "cluster"]), + labels=["user", "display_name", "email", "storage_class", "store"]), 'user_quota_enabled': GaugeMetricFamily('radosgw_usage_user_quota_enabled', 'User quota enabled', - labels=["user", "cluster"]), + labels=["user", "store"]), 'user_quota_max_size': GaugeMetricFamily('radosgw_usage_user_quota_size', 'Maximum allowed size for user', - labels=["user", "cluster"]), + labels=["user", "store"]), 'user_quota_max_size_bytes': GaugeMetricFamily('radosgw_usage_user_quota_size_bytes', 'Maximum allowed size in bytes for user', - labels=["user", "cluster"]), + labels=["user", "store"]), 'user_quota_max_objects': GaugeMetricFamily('radosgw_usage_user_quota_size_objects', 'Maximum allowed number of objects across all user buckets', - labels=["user", "cluster"]), + labels=["user", "store"]), 'user_bucket_quota_enabled': GaugeMetricFamily('radosgw_usage_user_bucket_quota_enabled', 'User per-bucket-quota enabled', - labels=["user", "cluster"]), + labels=["user", "store"]), 'user_bucket_quota_max_size': GaugeMetricFamily('radosgw_usage_user_bucket_quota_size', 'Maximum allowed size for each bucket of user', - labels=["user", "cluster"]), + labels=["user", "store"]), 'user_bucket_quota_max_size_bytes': GaugeMetricFamily('radosgw_usage_user_bucket_quota_size_bytes', 'Maximum allowed size bytes size for each bucket of user', - labels=["user", "cluster"]), + labels=["user", "store"]), 'user_bucket_quota_max_objects': GaugeMetricFamily('radosgw_usage_user_bucket_quota_size_objects', 'Maximum allowed number of objects in each user bucket', - labels=["user", "cluster"]), + labels=["user", "store"]), 'user_total_objects': GaugeMetricFamily('radosgw_usage_user_total_objects', 'Usage of objects by user', - labels=["user", "cluster"]), + labels=["user", "store"]), 'user_total_bytes': GaugeMetricFamily('radosgw_usage_user_total_bytes', 'Usage of bytes by user', - labels=["user", "cluster"]), + labels=["user", "store"]), 'scrape_duration_seconds': GaugeMetricFamily('radosgw_usage_scrape_duration_seconds', 'Ammount of time each scrape takes', @@ -285,19 +285,19 @@ def _update_usage_metrics(self): for category in list(self.usage_dict[bucket_owner][bucket_name].keys()): data_dict = self.usage_dict[bucket_owner][bucket_name][category] self._prometheus_metrics['ops'].add_metric( - [bucket_name, bucket_owner, category, self.cluster_name], + [bucket_name, bucket_owner, category, self.store], data_dict['ops']) self._prometheus_metrics['successful_ops'].add_metric( - [bucket_name, bucket_owner, category, self.cluster_name], + [bucket_name, bucket_owner, category, self.store], data_dict['successful_ops']) self._prometheus_metrics['bytes_sent'].add_metric( - [bucket_name, bucket_owner, category, self.cluster_name], + [bucket_name, bucket_owner, category, self.store], data_dict['bytes_sent']) self._prometheus_metrics['bytes_received'].add_metric( - [bucket_name, bucket_owner, category, self.cluster_name], + [bucket_name, bucket_owner, category, self.store], data_dict['bytes_received']) def _get_bucket_usage(self, bucket): @@ -348,33 +348,33 @@ def _get_bucket_usage(self, bucket): taglist = '' self._prometheus_metrics['bucket_usage_bytes'].add_metric( - [bucket_name, bucket_owner, bucket_zonegroup, self.cluster_name, taglist], + [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], bucket_usage_bytes) self._prometheus_metrics['bucket_utilized_bytes'].add_metric( - [bucket_name, bucket_owner, bucket_zonegroup, self.cluster_name, taglist], + [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], bucket_utilized_bytes) self._prometheus_metrics['bucket_usage_objects'].add_metric( - [bucket_name, bucket_owner, bucket_zonegroup, self.cluster_name, taglist], + [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], bucket_usage_objects) if 'bucket_quota' in bucket: self._prometheus_metrics['bucket_quota_enabled'].add_metric( - [bucket_name, bucket_owner, bucket_zonegroup, self.cluster_name, taglist], + [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], bucket['bucket_quota']['enabled']) self._prometheus_metrics['bucket_quota_max_size'].add_metric( - [bucket_name, bucket_owner, bucket_zonegroup, self.cluster_name, taglist], + [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], bucket['bucket_quota']['max_size']) self._prometheus_metrics['bucket_quota_max_size_bytes'].add_metric( - [bucket_name, bucket_owner, bucket_zonegroup, self.cluster_name, taglist], + [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], bucket['bucket_quota']['max_size_kb'] * 1024) self._prometheus_metrics['bucket_quota_max_objects'].add_metric( - [bucket_name, bucket_owner, bucket_zonegroup, self.cluster_name, taglist], + [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], bucket['bucket_quota']['max_objects']) self._prometheus_metrics['bucket_shards'].add_metric( - [bucket_name, bucket_owner, bucket_zonegroup, self.cluster_name, taglist], + [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], bucket_shards) else: @@ -421,35 +421,35 @@ def _get_user_info(self, user): user_storage_class = "" self._prometheus_metrics['user_metadata'].add_metric( - [user, user_display_name, user_email, user_storage_class, self.cluster_name], 1) + [user, user_display_name, user_email, user_storage_class, self.store], 1) if 'stats' in user_info: self._prometheus_metrics['user_total_bytes'].add_metric( - [user, self.cluster_name], user_info['stats']['size_actual']) + [user, self.store], user_info['stats']['size_actual']) self._prometheus_metrics['user_total_objects'].add_metric( - [user, self.cluster_name], user_info['stats']['num_objects']) + [user, self.store], user_info['stats']['num_objects']) if 'user_quota' in user_info: quota = user_info['user_quota'] self._prometheus_metrics['user_quota_enabled'].add_metric( - [user, self.cluster_name], quota['enabled']) + [user, self.store], quota['enabled']) self._prometheus_metrics['user_quota_max_size'].add_metric( - [user, self.cluster_name], quota['max_size']) + [user, self.store], quota['max_size']) self._prometheus_metrics['user_quota_max_size_bytes'].add_metric( - [user, self.cluster_name], quota['max_size_kb'] * 1024) + [user, self.store], quota['max_size_kb'] * 1024) self._prometheus_metrics['user_quota_max_objects'].add_metric( - [user, self.cluster_name], quota['max_objects']) + [user, self.store], quota['max_objects']) if 'bucket_quota' in user_info: quota = user_info['bucket_quota'] self._prometheus_metrics['user_bucket_quota_enabled'].add_metric( - [user, self.cluster_name], quota['enabled']) + [user, self.store], quota['enabled']) self._prometheus_metrics['user_bucket_quota_max_size'].add_metric( - [user, self.cluster_name], quota['max_size']) + [user, self.store], quota['max_size']) self._prometheus_metrics['user_bucket_quota_max_size_bytes'].add_metric( - [user, self.cluster_name], quota['max_size_kb'] * 1024) + [user, self.store], quota['max_size_kb'] * 1024) self._prometheus_metrics['user_bucket_quota_max_objects'].add_metric( - [user, self.cluster_name], quota['max_objects']) + [user, self.store], quota['max_objects']) def parse_args(): @@ -494,10 +494,10 @@ def parse_args(): default=int(os.environ.get('VIRTUAL_PORT', '9242')) ) parser.add_argument( - '-c', '--cluster', + '-S', '--store', required=False, - help='cluster name', - default=os.environ.get('CLUSTER_NAME', 'ceph'), + help='store name added to metrics', + default=os.environ.get('STORE', 'us-east-1'), ) parser.add_argument( '-t', '--timeout', @@ -512,7 +512,7 @@ def main(): try: args = parse_args() REGISTRY.register(RADOSGWCollector( - args.host, args.admin_entry, args.access_key, args.secret_key, args.cluster, args.insecure, args.timeout)) + args.host, args.admin_entry, args.access_key, args.secret_key, args.store, args.insecure, args.timeout)) start_http_server(args.port) print(("Polling {0}. Serving at port: {1}".format(args.host, args.port))) while True: diff --git a/requirements.txt b/requirements.txt index a7f8046..533aad6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -prometheus-client==0.11.0 +prometheus-client==0.18.0 requests>=2.26.0 -boto==2.46.1 -requests-aws==0.1.8 \ No newline at end of file +boto==2.49.0 +requests-aws==0.1.8 From 9634e48e109d182b2a03aed0f29d924ad62c59d9 Mon Sep 17 00:00:00 2001 From: Alexander Gil Date: Thu, 2 Nov 2023 12:50:54 +0100 Subject: [PATCH 2/7] feat: Add log-level configuration param --- radosgw_usage_exporter.py | 42 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/radosgw_usage_exporter.py b/radosgw_usage_exporter.py index c8b89c3..0627743 100644 --- a/radosgw_usage_exporter.py +++ b/radosgw_usage_exporter.py @@ -13,9 +13,6 @@ from collections import defaultdict, Counter from prometheus_client.core import GaugeMetricFamily, CounterMetricFamily, REGISTRY -logging.basicConfig(level=logging.DEBUG) -DEBUG = int(os.environ.get('DEBUG', '0')) - class RADOSGWCollector(object): """RADOSGWCollector gathers bucket level usage data for all buckets from @@ -101,8 +98,7 @@ def _session(self): # requests warning about certificate hostname mismatch. if not self.insecure: warnings.filterwarnings('ignore', message='Unverified HTTPS request') - if DEBUG: - print("Perform insecured requests") + logging.debug("Perform insecured requests") def _request_data(self, query, args): """ @@ -118,18 +114,17 @@ def _request_data(self, query, args): self.host)) if response.status_code == requests.codes.ok: - if DEBUG: - print(response) + logging.debug(response) return response.json() else: # Usage caps absent or wrong admin entry - print(("Request error [{0}]: {1}".format( + logging.error(("Request error [{0}]: {1}".format( response.status_code, response.content.decode('utf-8')))) return # DNS, connection errors, etc except requests.exceptions.RequestException as e: - print(("Request error: {0}".format(e))) + logging.info(("Request error: {0}".format(e))) return def _setup_empty_prometheus_metrics(self): @@ -254,8 +249,7 @@ def _get_usage(self, entry): self.usage_dict[bucket_owner] = defaultdict(dict) for bucket in entry['buckets']: - if DEBUG: - print((json.dumps(bucket, indent=4, sort_keys=True))) + logging.debug((json.dumps(bucket, indent=4, sort_keys=True))) if not bucket['bucket']: bucket_name = "bucket_root" @@ -305,9 +299,7 @@ def _get_bucket_usage(self, bucket): Method get actual bucket usage (in bytes). Some skips and adjustments for various Ceph releases. """ - - if DEBUG: - print((json.dumps(bucket, indent=4, sort_keys=True))) + logging.debug((json.dumps(bucket, indent=4, sort_keys=True))) if type(bucket) is dict: bucket_name = bucket['bucket'] @@ -402,9 +394,7 @@ def _get_user_info(self, user): Method to get the info on a specific user(s). """ user_info = self._request_data(query='user', args="uid={0}&stats=True".format(user)) - - if DEBUG: - print((json.dumps(user_info, indent=4, sort_keys=True))) + logging.debug((json.dumps(user_info, indent=4, sort_keys=True))) if 'display_name' in user_info: user_display_name = user_info['display_name'] @@ -464,19 +454,19 @@ def parse_args(): default=os.environ.get('RADOSGW_SERVER', 'http://radosgw:80') ) parser.add_argument( - '-e', '--admin_entry', + '-e', '--admin-entry', required=False, help="The entry point for an admin request URL [default is '%(default)s']", default=os.environ.get('ADMIN_ENTRY', 'admin') ) parser.add_argument( - '-a', '--access_key', + '-a', '--access-key', required=False, help='S3 access key', default=os.environ.get('ACCESS_KEY', 'NA') ) parser.add_argument( - '-s', '--secret_key', + '-s', '--secret-key', required=False, help='S3 secrest key', default=os.environ.get('SECRET_KEY', 'NA') @@ -505,20 +495,28 @@ def parse_args(): help='Timeout when getting metrics', default=os.environ.get('TIMEOUT', '60'), ) + parser.add_argument( + '-l', '--log-level', + required=False, + help='Provide logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL', + default=os.environ.get('LOG_LEVEL', 'INFO'), + ) + return parser.parse_args() def main(): try: args = parse_args() + logging.basicConfig(level=args.log_level.upper()) REGISTRY.register(RADOSGWCollector( args.host, args.admin_entry, args.access_key, args.secret_key, args.store, args.insecure, args.timeout)) start_http_server(args.port) - print(("Polling {0}. Serving at port: {1}".format(args.host, args.port))) + logging.info(("Polling {0}. Serving at port: {1}".format(args.host, args.port))) while True: time.sleep(1) except KeyboardInterrupt: - print("\nInterrupted") + logging.info("\nInterrupted") exit(0) From 2fa27236ce2a7aa9e37f40e06d9f7b8ece547eef Mon Sep 17 00:00:00 2001 From: Alexander Gil Date: Thu, 2 Nov 2023 12:59:07 +0100 Subject: [PATCH 3/7] refactor: Format code with black --- radosgw_usage_exporter.py | 572 ++++++++++++++++++++++---------------- 1 file changed, 328 insertions(+), 244 deletions(-) diff --git a/radosgw_usage_exporter.py b/radosgw_usage_exporter.py index 0627743..c2d2298 100644 --- a/radosgw_usage_exporter.py +++ b/radosgw_usage_exporter.py @@ -21,9 +21,11 @@ class RADOSGWCollector(object): NOTE: By default RADOSGW Servers do not gather usage data and it must be enabled by 'rgw enable usage log = true' in the appropriate section - of ceph.conf see Ceph documentation for details """ + of ceph.conf see Ceph documentation for details""" - def __init__(self, host, admin_entry, access_key, secret_key, store, insecure, timeout): + def __init__( + self, host, admin_entry, access_key, secret_key, store, insecure, timeout + ): super(RADOSGWCollector, self).__init__() self.host = host self.access_key = access_key @@ -58,13 +60,13 @@ def collect(self): # setup dict for aggregating bucket usage accross "bins" self.usage_dict = defaultdict(dict) - rgw_usage = self._request_data(query='usage', args='show-summary=False') - rgw_bucket = self._request_data(query='bucket', args='stats=True') + rgw_usage = self._request_data(query="usage", args="show-summary=False") + rgw_bucket = self._request_data(query="bucket", args="stats=True") rgw_users = self._get_rgw_users() # populate metrics with data if rgw_usage: - for entry in rgw_usage['entries']: + for entry in rgw_usage["entries"]: self._get_usage(entry) self._update_usage_metrics() @@ -77,8 +79,7 @@ def collect(self): self._get_user_info(user) duration = time.time() - start - self._prometheus_metrics['scrape_duration_seconds'].add_metric( - [], duration) + self._prometheus_metrics["scrape_duration_seconds"].add_metric([], duration) for metric in list(self._prometheus_metrics.values()): yield metric @@ -89,15 +90,15 @@ def _session(self): """ self.session = requests.Session() self.session_adapter = requests.adapters.HTTPAdapter( - pool_connections=10, - pool_maxsize=10) - self.session.mount('http://', self.session_adapter) - self.session.mount('https://', self.session_adapter) + pool_connections=10, pool_maxsize=10 + ) + self.session.mount("http://", self.session_adapter) + self.session.mount("https://", self.session_adapter) # Inversion of condition, when '--insecure' is defined we disable # requests warning about certificate hostname mismatch. if not self.insecure: - warnings.filterwarnings('ignore', message='Unverified HTTPS request') + warnings.filterwarnings("ignore", message="Unverified HTTPS request") logging.debug("Perform insecured requests") def _request_data(self, query, args): @@ -108,18 +109,25 @@ def _request_data(self, query, args): url = "{0}{1}/?format=json&{2}".format(self.url, query, args) try: - response = self.session.get(url, verify=self.insecure, timeout=float(self.timeout), - auth=S3Auth(self.access_key, - self.secret_key, - self.host)) + response = self.session.get( + url, + verify=self.insecure, + timeout=float(self.timeout), + auth=S3Auth(self.access_key, self.secret_key, self.host), + ) if response.status_code == requests.codes.ok: logging.debug(response) return response.json() else: # Usage caps absent or wrong admin entry - logging.error(("Request error [{0}]: {1}".format( - response.status_code, response.content.decode('utf-8')))) + logging.error( + ( + "Request error [{0}]: {1}".format( + response.status_code, response.content.decode("utf-8") + ) + ) + ) return # DNS, connection errors, etc @@ -133,102 +141,126 @@ def _setup_empty_prometheus_metrics(self): """ self._prometheus_metrics = { - 'ops': - CounterMetricFamily('radosgw_usage_ops_total', - 'Number of operations', - labels=["bucket", "owner", "category", "store", "tags"]), - 'successful_ops': - CounterMetricFamily('radosgw_usage_successful_ops_total', - 'Number of successful operations', - labels=["bucket", "owner", "category", "store", "tags"]), - 'bytes_sent': - CounterMetricFamily('radosgw_usage_sent_bytes_total', - 'Bytes sent by the RADOSGW', - labels=["bucket", "owner", "category", "store", "tags"]), - 'bytes_received': - CounterMetricFamily('radosgw_usage_received_bytes_total', - 'Bytes received by the RADOSGW', - labels=["bucket", "owner", "category", "store", "tags"]), - 'bucket_usage_bytes': - GaugeMetricFamily('radosgw_usage_bucket_bytes', - 'Bucket used bytes', - labels=["bucket", "owner", "zonegroup", "store", "tags"]), - 'bucket_utilized_bytes': - GaugeMetricFamily('radosgw_usage_bucket_utilized_bytes', - 'Bucket utilized bytes', - labels=["bucket", "owner", "zonegroup", "store", "tags"]), - 'bucket_usage_objects': - GaugeMetricFamily('radosgw_usage_bucket_objects', - 'Number of objects in bucket', - labels=["bucket", "owner", "zonegroup", "store", "tags"]), - 'bucket_quota_enabled': - GaugeMetricFamily('radosgw_usage_bucket_quota_enabled', - 'Quota enabled for bucket', - labels=["bucket", "owner", "zonegroup", "store", "tags"]), - 'bucket_quota_max_size': - GaugeMetricFamily('radosgw_usage_bucket_quota_size', - 'Maximum allowed bucket size', - labels=["bucket", "owner", "zonegroup", "store", "tags"]), - 'bucket_quota_max_size_bytes': - GaugeMetricFamily('radosgw_usage_bucket_quota_size_bytes', - 'Maximum allowed bucket size in bytes', - labels=["bucket", "owner", "zonegroup", "store", "tags"]), - 'bucket_quota_max_objects': - GaugeMetricFamily('radosgw_usage_bucket_quota_size_objects', - 'Maximum allowed bucket size in number of objects', - labels=["bucket", "owner", "zonegroup", "store", "tags"]), - 'bucket_shards': - GaugeMetricFamily('radosgw_usage_bucket_shards', - 'Number ob shards in bucket', - labels=["bucket", "owner", "zonegroup", "store", "tags"]), - 'user_metadata': - GaugeMetricFamily('radosgw_user_metadata', - 'User metadata', - labels=["user", "display_name", "email", "storage_class", "store"]), - 'user_quota_enabled': - GaugeMetricFamily('radosgw_usage_user_quota_enabled', - 'User quota enabled', - labels=["user", "store"]), - 'user_quota_max_size': - GaugeMetricFamily('radosgw_usage_user_quota_size', - 'Maximum allowed size for user', - labels=["user", "store"]), - 'user_quota_max_size_bytes': - GaugeMetricFamily('radosgw_usage_user_quota_size_bytes', - 'Maximum allowed size in bytes for user', - labels=["user", "store"]), - 'user_quota_max_objects': - GaugeMetricFamily('radosgw_usage_user_quota_size_objects', - 'Maximum allowed number of objects across all user buckets', - labels=["user", "store"]), - 'user_bucket_quota_enabled': - GaugeMetricFamily('radosgw_usage_user_bucket_quota_enabled', - 'User per-bucket-quota enabled', - labels=["user", "store"]), - 'user_bucket_quota_max_size': - GaugeMetricFamily('radosgw_usage_user_bucket_quota_size', - 'Maximum allowed size for each bucket of user', - labels=["user", "store"]), - 'user_bucket_quota_max_size_bytes': - GaugeMetricFamily('radosgw_usage_user_bucket_quota_size_bytes', - 'Maximum allowed size bytes size for each bucket of user', - labels=["user", "store"]), - 'user_bucket_quota_max_objects': - GaugeMetricFamily('radosgw_usage_user_bucket_quota_size_objects', - 'Maximum allowed number of objects in each user bucket', - labels=["user", "store"]), - 'user_total_objects': - GaugeMetricFamily('radosgw_usage_user_total_objects', - 'Usage of objects by user', - labels=["user", "store"]), - 'user_total_bytes': - GaugeMetricFamily('radosgw_usage_user_total_bytes', - 'Usage of bytes by user', - labels=["user", "store"]), - 'scrape_duration_seconds': - GaugeMetricFamily('radosgw_usage_scrape_duration_seconds', - 'Ammount of time each scrape takes', - labels=[]) + "ops": CounterMetricFamily( + "radosgw_usage_ops_total", + "Number of operations", + labels=["bucket", "owner", "category", "store", "tags"], + ), + "successful_ops": CounterMetricFamily( + "radosgw_usage_successful_ops_total", + "Number of successful operations", + labels=["bucket", "owner", "category", "store", "tags"], + ), + "bytes_sent": CounterMetricFamily( + "radosgw_usage_sent_bytes_total", + "Bytes sent by the RADOSGW", + labels=["bucket", "owner", "category", "store", "tags"], + ), + "bytes_received": CounterMetricFamily( + "radosgw_usage_received_bytes_total", + "Bytes received by the RADOSGW", + labels=["bucket", "owner", "category", "store", "tags"], + ), + "bucket_usage_bytes": GaugeMetricFamily( + "radosgw_usage_bucket_bytes", + "Bucket used bytes", + labels=["bucket", "owner", "zonegroup", "store", "tags"], + ), + "bucket_utilized_bytes": GaugeMetricFamily( + "radosgw_usage_bucket_utilized_bytes", + "Bucket utilized bytes", + labels=["bucket", "owner", "zonegroup", "store", "tags"], + ), + "bucket_usage_objects": GaugeMetricFamily( + "radosgw_usage_bucket_objects", + "Number of objects in bucket", + labels=["bucket", "owner", "zonegroup", "store", "tags"], + ), + "bucket_quota_enabled": GaugeMetricFamily( + "radosgw_usage_bucket_quota_enabled", + "Quota enabled for bucket", + labels=["bucket", "owner", "zonegroup", "store", "tags"], + ), + "bucket_quota_max_size": GaugeMetricFamily( + "radosgw_usage_bucket_quota_size", + "Maximum allowed bucket size", + labels=["bucket", "owner", "zonegroup", "store", "tags"], + ), + "bucket_quota_max_size_bytes": GaugeMetricFamily( + "radosgw_usage_bucket_quota_size_bytes", + "Maximum allowed bucket size in bytes", + labels=["bucket", "owner", "zonegroup", "store", "tags"], + ), + "bucket_quota_max_objects": GaugeMetricFamily( + "radosgw_usage_bucket_quota_size_objects", + "Maximum allowed bucket size in number of objects", + labels=["bucket", "owner", "zonegroup", "store", "tags"], + ), + "bucket_shards": GaugeMetricFamily( + "radosgw_usage_bucket_shards", + "Number ob shards in bucket", + labels=["bucket", "owner", "zonegroup", "store", "tags"], + ), + "user_metadata": GaugeMetricFamily( + "radosgw_user_metadata", + "User metadata", + labels=["user", "display_name", "email", "storage_class", "store"], + ), + "user_quota_enabled": GaugeMetricFamily( + "radosgw_usage_user_quota_enabled", + "User quota enabled", + labels=["user", "store"], + ), + "user_quota_max_size": GaugeMetricFamily( + "radosgw_usage_user_quota_size", + "Maximum allowed size for user", + labels=["user", "store"], + ), + "user_quota_max_size_bytes": GaugeMetricFamily( + "radosgw_usage_user_quota_size_bytes", + "Maximum allowed size in bytes for user", + labels=["user", "store"], + ), + "user_quota_max_objects": GaugeMetricFamily( + "radosgw_usage_user_quota_size_objects", + "Maximum allowed number of objects across all user buckets", + labels=["user", "store"], + ), + "user_bucket_quota_enabled": GaugeMetricFamily( + "radosgw_usage_user_bucket_quota_enabled", + "User per-bucket-quota enabled", + labels=["user", "store"], + ), + "user_bucket_quota_max_size": GaugeMetricFamily( + "radosgw_usage_user_bucket_quota_size", + "Maximum allowed size for each bucket of user", + labels=["user", "store"], + ), + "user_bucket_quota_max_size_bytes": GaugeMetricFamily( + "radosgw_usage_user_bucket_quota_size_bytes", + "Maximum allowed size bytes size for each bucket of user", + labels=["user", "store"], + ), + "user_bucket_quota_max_objects": GaugeMetricFamily( + "radosgw_usage_user_bucket_quota_size_objects", + "Maximum allowed number of objects in each user bucket", + labels=["user", "store"], + ), + "user_total_objects": GaugeMetricFamily( + "radosgw_usage_user_total_objects", + "Usage of objects by user", + labels=["user", "store"], + ), + "user_total_bytes": GaugeMetricFamily( + "radosgw_usage_user_total_bytes", + "Usage of bytes by user", + labels=["user", "store"], + ), + "scrape_duration_seconds": GaugeMetricFamily( + "radosgw_usage_scrape_duration_seconds", + "Ammount of time each scrape takes", + labels=[], + ), } def _get_usage(self, entry): @@ -239,35 +271,43 @@ def _get_usage(self, entry): entry bins. """ - if 'owner' in entry: - bucket_owner = entry['owner'] + if "owner" in entry: + bucket_owner = entry["owner"] # Luminous - elif 'user' in entry: - bucket_owner = entry['user'] + elif "user" in entry: + bucket_owner = entry["user"] if bucket_owner not in list(self.usage_dict.keys()): self.usage_dict[bucket_owner] = defaultdict(dict) - for bucket in entry['buckets']: + for bucket in entry["buckets"]: logging.debug((json.dumps(bucket, indent=4, sort_keys=True))) - if not bucket['bucket']: + if not bucket["bucket"]: bucket_name = "bucket_root" else: - bucket_name = bucket['bucket'] + bucket_name = bucket["bucket"] if bucket_name not in list(self.usage_dict[bucket_owner].keys()): self.usage_dict[bucket_owner][bucket_name] = defaultdict(dict) - for category in bucket['categories']: - category_name = category['category'] - if category_name not in list(self.usage_dict[bucket_owner][bucket_name].keys()): - self.usage_dict[bucket_owner][bucket_name][category_name] = Counter() + for category in bucket["categories"]: + category_name = category["category"] + if category_name not in list( + self.usage_dict[bucket_owner][bucket_name].keys() + ): + self.usage_dict[bucket_owner][bucket_name][ + category_name + ] = Counter() c = self.usage_dict[bucket_owner][bucket_name][category_name] - c.update({'ops': category['ops'], - 'successful_ops': category['successful_ops'], - 'bytes_sent': category['bytes_sent'], - 'bytes_received': category['bytes_received']}) + c.update( + { + "ops": category["ops"], + "successful_ops": category["successful_ops"], + "bytes_sent": category["bytes_sent"], + "bytes_received": category["bytes_received"], + } + ) def _update_usage_metrics(self): """ @@ -278,21 +318,25 @@ def _update_usage_metrics(self): for bucket_name in list(self.usage_dict[bucket_owner].keys()): for category in list(self.usage_dict[bucket_owner][bucket_name].keys()): data_dict = self.usage_dict[bucket_owner][bucket_name][category] - self._prometheus_metrics['ops'].add_metric( + self._prometheus_metrics["ops"].add_metric( [bucket_name, bucket_owner, category, self.store], - data_dict['ops']) + data_dict["ops"], + ) - self._prometheus_metrics['successful_ops'].add_metric( + self._prometheus_metrics["successful_ops"].add_metric( [bucket_name, bucket_owner, category, self.store], - data_dict['successful_ops']) + data_dict["successful_ops"], + ) - self._prometheus_metrics['bytes_sent'].add_metric( + self._prometheus_metrics["bytes_sent"].add_metric( [bucket_name, bucket_owner, category, self.store], - data_dict['bytes_sent']) + data_dict["bytes_sent"], + ) - self._prometheus_metrics['bytes_received'].add_metric( + self._prometheus_metrics["bytes_received"].add_metric( [bucket_name, bucket_owner, category, self.store], - data_dict['bytes_received']) + data_dict["bytes_received"], + ) def _get_bucket_usage(self, bucket): """ @@ -302,72 +346,81 @@ def _get_bucket_usage(self, bucket): logging.debug((json.dumps(bucket, indent=4, sort_keys=True))) if type(bucket) is dict: - bucket_name = bucket['bucket'] - bucket_owner = bucket['owner'] - bucket_shards = bucket['num_shards'] + bucket_name = bucket["bucket"] + bucket_owner = bucket["owner"] + bucket_shards = bucket["num_shards"] bucket_usage_bytes = 0 bucket_utilized_bytes = 0 bucket_usage_objects = 0 - if bucket['usage'] and 'rgw.main' in bucket['usage']: + if bucket["usage"] and "rgw.main" in bucket["usage"]: # Prefer bytes, instead kbytes - if 'size_actual' in bucket['usage']['rgw.main']: - bucket_usage_bytes = bucket['usage']['rgw.main']['size_actual'] + if "size_actual" in bucket["usage"]["rgw.main"]: + bucket_usage_bytes = bucket["usage"]["rgw.main"]["size_actual"] # Hammer don't have bytes field - elif 'size_kb_actual' in bucket['usage']['rgw.main']: - usage_kb = bucket['usage']['rgw.main']['size_kb_actual'] + elif "size_kb_actual" in bucket["usage"]["rgw.main"]: + usage_kb = bucket["usage"]["rgw.main"]["size_kb_actual"] bucket_usage_bytes = usage_kb * 1024 # Compressed buckets, since Kraken - if 'size_utilized' in bucket['usage']['rgw.main']: - bucket_utilized_bytes = bucket['usage']['rgw.main']['size_utilized'] + if "size_utilized" in bucket["usage"]["rgw.main"]: + bucket_utilized_bytes = bucket["usage"]["rgw.main"]["size_utilized"] # Get number of objects in bucket - if 'num_objects' in bucket['usage']['rgw.main']: - bucket_usage_objects = bucket['usage']['rgw.main']['num_objects'] + if "num_objects" in bucket["usage"]["rgw.main"]: + bucket_usage_objects = bucket["usage"]["rgw.main"]["num_objects"] - if 'zonegroup' in bucket: - bucket_zonegroup = bucket['zonegroup'] + if "zonegroup" in bucket: + bucket_zonegroup = bucket["zonegroup"] # Hammer else: bucket_zonegroup = "0" - if 'tagset' in bucket: - bucket_tagset = bucket['tagset'] - taglist = ", ".join("=".join((k, str(v))) - for k, v in sorted(bucket_tagset.items())) + if "tagset" in bucket: + bucket_tagset = bucket["tagset"] + taglist = ", ".join( + "=".join((k, str(v))) for k, v in sorted(bucket_tagset.items()) + ) else: - taglist = '' + taglist = "" - self._prometheus_metrics['bucket_usage_bytes'].add_metric( + self._prometheus_metrics["bucket_usage_bytes"].add_metric( [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], - bucket_usage_bytes) + bucket_usage_bytes, + ) - self._prometheus_metrics['bucket_utilized_bytes'].add_metric( + self._prometheus_metrics["bucket_utilized_bytes"].add_metric( [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], - bucket_utilized_bytes) + bucket_utilized_bytes, + ) - self._prometheus_metrics['bucket_usage_objects'].add_metric( + self._prometheus_metrics["bucket_usage_objects"].add_metric( [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], - bucket_usage_objects) + bucket_usage_objects, + ) - if 'bucket_quota' in bucket: - self._prometheus_metrics['bucket_quota_enabled'].add_metric( + if "bucket_quota" in bucket: + self._prometheus_metrics["bucket_quota_enabled"].add_metric( [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], - bucket['bucket_quota']['enabled']) - self._prometheus_metrics['bucket_quota_max_size'].add_metric( + bucket["bucket_quota"]["enabled"], + ) + self._prometheus_metrics["bucket_quota_max_size"].add_metric( [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], - bucket['bucket_quota']['max_size']) - self._prometheus_metrics['bucket_quota_max_size_bytes'].add_metric( + bucket["bucket_quota"]["max_size"], + ) + self._prometheus_metrics["bucket_quota_max_size_bytes"].add_metric( [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], - bucket['bucket_quota']['max_size_kb'] * 1024) - self._prometheus_metrics['bucket_quota_max_objects'].add_metric( + bucket["bucket_quota"]["max_size_kb"] * 1024, + ) + self._prometheus_metrics["bucket_quota_max_objects"].add_metric( [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], - bucket['bucket_quota']['max_objects']) + bucket["bucket_quota"]["max_objects"], + ) - self._prometheus_metrics['bucket_shards'].add_metric( + self._prometheus_metrics["bucket_shards"].add_metric( [bucket_name, bucket_owner, bucket_zonegroup, self.store, taglist], - bucket_shards) + bucket_shards, + ) else: # Hammer junk, just skip it @@ -378,13 +431,13 @@ def _get_rgw_users(self): API request to get users. """ - rgw_users = self._request_data(query='user', args='list') + rgw_users = self._request_data(query="user", args="list") - if rgw_users and 'keys' in rgw_users: - return rgw_users['keys'] + if rgw_users and "keys" in rgw_users: + return rgw_users["keys"] else: # Compat with old Ceph versions (pre 12.2.13/13.2.9) - rgw_metadata_users = self._request_data(query='metadata/user', args='') + rgw_metadata_users = self._request_data(query="metadata/user", args="") return rgw_metadata_users return @@ -393,113 +446,135 @@ def _get_user_info(self, user): """ Method to get the info on a specific user(s). """ - user_info = self._request_data(query='user', args="uid={0}&stats=True".format(user)) + user_info = self._request_data( + query="user", args="uid={0}&stats=True".format(user) + ) logging.debug((json.dumps(user_info, indent=4, sort_keys=True))) - if 'display_name' in user_info: - user_display_name = user_info['display_name'] + if "display_name" in user_info: + user_display_name = user_info["display_name"] else: user_display_name = "" - if 'email' in user_info: - user_email = user_info['email'] + if "email" in user_info: + user_email = user_info["email"] else: user_email = "" # Nautilus+ - if 'default_storage_class' in user_info: - user_storage_class = user_info['default_storage_class'] + if "default_storage_class" in user_info: + user_storage_class = user_info["default_storage_class"] else: user_storage_class = "" - self._prometheus_metrics['user_metadata'].add_metric( - [user, user_display_name, user_email, user_storage_class, self.store], 1) - - if 'stats' in user_info: - self._prometheus_metrics['user_total_bytes'].add_metric( - [user, self.store], user_info['stats']['size_actual']) - self._prometheus_metrics['user_total_objects'].add_metric( - [user, self.store], user_info['stats']['num_objects']) - - if 'user_quota' in user_info: - quota = user_info['user_quota'] - self._prometheus_metrics['user_quota_enabled'].add_metric( - [user, self.store], quota['enabled']) - self._prometheus_metrics['user_quota_max_size'].add_metric( - [user, self.store], quota['max_size']) - self._prometheus_metrics['user_quota_max_size_bytes'].add_metric( - [user, self.store], quota['max_size_kb'] * 1024) - self._prometheus_metrics['user_quota_max_objects'].add_metric( - [user, self.store], quota['max_objects']) - - if 'bucket_quota' in user_info: - quota = user_info['bucket_quota'] - self._prometheus_metrics['user_bucket_quota_enabled'].add_metric( - [user, self.store], quota['enabled']) - self._prometheus_metrics['user_bucket_quota_max_size'].add_metric( - [user, self.store], quota['max_size']) - self._prometheus_metrics['user_bucket_quota_max_size_bytes'].add_metric( - [user, self.store], quota['max_size_kb'] * 1024) - self._prometheus_metrics['user_bucket_quota_max_objects'].add_metric( - [user, self.store], quota['max_objects']) + self._prometheus_metrics["user_metadata"].add_metric( + [user, user_display_name, user_email, user_storage_class, self.store], 1 + ) + + if "stats" in user_info: + self._prometheus_metrics["user_total_bytes"].add_metric( + [user, self.store], user_info["stats"]["size_actual"] + ) + self._prometheus_metrics["user_total_objects"].add_metric( + [user, self.store], user_info["stats"]["num_objects"] + ) + + if "user_quota" in user_info: + quota = user_info["user_quota"] + self._prometheus_metrics["user_quota_enabled"].add_metric( + [user, self.store], quota["enabled"] + ) + self._prometheus_metrics["user_quota_max_size"].add_metric( + [user, self.store], quota["max_size"] + ) + self._prometheus_metrics["user_quota_max_size_bytes"].add_metric( + [user, self.store], quota["max_size_kb"] * 1024 + ) + self._prometheus_metrics["user_quota_max_objects"].add_metric( + [user, self.store], quota["max_objects"] + ) + + if "bucket_quota" in user_info: + quota = user_info["bucket_quota"] + self._prometheus_metrics["user_bucket_quota_enabled"].add_metric( + [user, self.store], quota["enabled"] + ) + self._prometheus_metrics["user_bucket_quota_max_size"].add_metric( + [user, self.store], quota["max_size"] + ) + self._prometheus_metrics["user_bucket_quota_max_size_bytes"].add_metric( + [user, self.store], quota["max_size_kb"] * 1024 + ) + self._prometheus_metrics["user_bucket_quota_max_objects"].add_metric( + [user, self.store], quota["max_objects"] + ) def parse_args(): parser = argparse.ArgumentParser( - description='RADOSGW address and local binding port as well as \ - S3 access_key and secret_key' + description="RADOSGW address and local binding port as well as \ + S3 access_key and secret_key" ) parser.add_argument( - '-H', '--host', + "-H", + "--host", required=False, - help='Server URL for the RADOSGW api (example: http://objects.dreamhost.com/)', - default=os.environ.get('RADOSGW_SERVER', 'http://radosgw:80') + help="Server URL for the RADOSGW api (example: http://objects.dreamhost.com/)", + default=os.environ.get("RADOSGW_SERVER", "http://radosgw:80"), ) parser.add_argument( - '-e', '--admin-entry', + "-e", + "--admin-entry", required=False, help="The entry point for an admin request URL [default is '%(default)s']", - default=os.environ.get('ADMIN_ENTRY', 'admin') + default=os.environ.get("ADMIN_ENTRY", "admin"), ) parser.add_argument( - '-a', '--access-key', + "-a", + "--access-key", required=False, - help='S3 access key', - default=os.environ.get('ACCESS_KEY', 'NA') + help="S3 access key", + default=os.environ.get("ACCESS_KEY", "NA"), ) parser.add_argument( - '-s', '--secret-key', + "-s", + "--secret-key", required=False, - help='S3 secrest key', - default=os.environ.get('SECRET_KEY', 'NA') + help="S3 secrest key", + default=os.environ.get("SECRET_KEY", "NA"), ) parser.add_argument( - '-k', '--insecure', - help='Allow insecure server connections when using SSL', - action="store_false" + "-k", + "--insecure", + help="Allow insecure server connections when using SSL", + action="store_false", ) parser.add_argument( - '-p', '--port', + "-p", + "--port", required=False, type=int, - help='Port to listen', - default=int(os.environ.get('VIRTUAL_PORT', '9242')) + help="Port to listen", + default=int(os.environ.get("VIRTUAL_PORT", "9242")), ) parser.add_argument( - '-S', '--store', + "-S", + "--store", required=False, - help='store name added to metrics', - default=os.environ.get('STORE', 'us-east-1'), + help="store name added to metrics", + default=os.environ.get("STORE", "us-east-1"), ) parser.add_argument( - '-t', '--timeout', + "-t", + "--timeout", required=False, - help='Timeout when getting metrics', - default=os.environ.get('TIMEOUT', '60'), + help="Timeout when getting metrics", + default=os.environ.get("TIMEOUT", "60"), ) parser.add_argument( - '-l', '--log-level', + "-l", + "--log-level", required=False, - help='Provide logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL', - default=os.environ.get('LOG_LEVEL', 'INFO'), + help="Provide logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL", + default=os.environ.get("LOG_LEVEL", "INFO"), ) return parser.parse_args() @@ -509,8 +584,17 @@ def main(): try: args = parse_args() logging.basicConfig(level=args.log_level.upper()) - REGISTRY.register(RADOSGWCollector( - args.host, args.admin_entry, args.access_key, args.secret_key, args.store, args.insecure, args.timeout)) + REGISTRY.register( + RADOSGWCollector( + args.host, + args.admin_entry, + args.access_key, + args.secret_key, + args.store, + args.insecure, + args.timeout, + ) + ) start_http_server(args.port) logging.info(("Polling {0}. Serving at port: {1}".format(args.host, args.port))) while True: From b99145cd9bbd38c0068c2f36cce0ae24f75dc502 Mon Sep 17 00:00:00 2001 From: Alexander Gil Date: Thu, 2 Nov 2023 13:00:46 +0100 Subject: [PATCH 4/7] refactor: Remove duplicated envars in Dockerfile --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 71da94d..5deef96 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,6 @@ RUN pip install --no-cache-dir -r requirements.txt COPY radosgw_usage_exporter.py /usr/src/app EXPOSE 9242 -ENV RADOSGW_SERVER=http://radosgw:80 VIRTUAL_PORT=9242 DEBUG=0 ENTRYPOINT [ "python", "-u", "./radosgw_usage_exporter.py" ] CMD [] From 5267485cf721b99ea69e0928cf2ac013f363796d Mon Sep 17 00:00:00 2001 From: Alexander Gil Date: Thu, 2 Nov 2023 13:13:39 +0100 Subject: [PATCH 5/7] ci: Add github action to publish images --- .github/workflows/docker-images.yaml | 39 ++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/docker-images.yaml diff --git a/.github/workflows/docker-images.yaml b/.github/workflows/docker-images.yaml new file mode 100644 index 0000000..81ce29d --- /dev/null +++ b/.github/workflows/docker-images.yaml @@ -0,0 +1,39 @@ +name: Multi-Arch Docker Build and Push to GHCR + +on: + push: + branches: + - master + tags: + - v* +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Login to GHCR + run: echo ${{ secrets.GITHUB_TOKEN }} | docker login ghcr.io -u ${{ github.repository_owner }} --password-stdin + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx for multiarch images + uses: docker/setup-buildx-action@v3 + + - name: Build and push Docker image to GHCR + run: | + # Strip git ref prefix from version + VERSION=$(echo "${{ github.ref }}" | sed -e 's,.*/\(.*\),\1,') + + # Strip "v" prefix from tag name + [[ "${{ github.ref }}" == "refs/tags/"* ]] && VERSION=$(echo $VERSION | sed -e 's/^v//') + + # Use Docker `latest` tag convention + [ "$VERSION" == "master" ] && VERSION=latest + docker buildx build --platform linux/amd64,linux/arm64,linux/arm/v7 \ + --push \ + -t ghcr.io/${{ github.repository }}:${VERSION} \ + . From 798384d3d54df00d7ac24ed1bedb6c2cdd23ad39 Mon Sep 17 00:00:00 2001 From: Alexander Gil Date: Thu, 2 Nov 2023 13:37:42 +0100 Subject: [PATCH 6/7] docs: Update docker image and fix typos --- README.md | 95 ++++++++++++++++++++------------------- radosgw_usage_exporter.py | 6 +-- 2 files changed, 51 insertions(+), 50 deletions(-) diff --git a/README.md b/README.md index 42dcf59..e9b06a9 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,22 @@ # Ceph RADOSGW Usage Exporter -[Prometheus](https://prometheus.io/) exporter that scrapes -[Ceph](http://ceph.com/) RADOSGW usage information (operations and buckets). -This information is gathered from a RADOSGW using the +[Prometheus](https://prometheus.io/) exporter that scrapes [Ceph](http://ceph.com/) RADOSGW usage +information (operations and buckets). This information is gathered from a RADOSGW using the [Admin Operations API](http://docs.ceph.com/docs/master/radosgw/adminops/). This exporter was based off from both -(https://www.robustperception.io/writing-a-jenkins-exporter-in-python/) and the -more elaborate Jenkins exporter here -(https://github.com/lovoo/jenkins_exporter). +(https://www.robustperception.io/writing-a-jenkins-exporter-in-python/) and the more elaborate +Jenkins exporter here (https://github.com/lovoo/jenkins_exporter). ## Requirements -* Working Ceph Cluster with Object Gateways setup. -* Ceph RADOSGWs must beconfigured to gather usage information as this is not -on by default. The miniumum is to enable it via `ceph.conf` as below. There are -however other options that are available and should be considered -[here](http://docs.ceph.com/docs/master/radosgw/config-ref/). If you don't configure -thresholds, intervals, and shards you may end up having too large objects in the usage -namespace of the log pool. The values below are just examples. Check the documentation -which ones would be the best ones for your setup. +- Working Ceph Cluster with Object Gateways setup. +- Ceph RADOSGWs must beconfigured to gather usage information as this is not on by default. The + miniumum is to enable it via `ceph.conf` as below. There are however other options that are + available and should be considered [here](http://docs.ceph.com/docs/master/radosgw/config-ref/). + If you don't configure thresholds, intervals, and shards you may end up having too large objects + in the usage namespace of the log pool. The values below are just examples. Check the + documentation which ones would be the best ones for your setup. ``` rgw enable usage log = true @@ -30,19 +27,21 @@ rgw usage max user shards = 8 ``` -* Configure admin entry point (default is 'admin'): +- Configure admin entry point (default is 'admin'): + ``` rgw admin entry = "admin" ``` -* Enable admin API (default is enabled): +- Enable admin API (default is enabled): + ``` rgw enable apis = "s3, admin" ``` -* This exporter requires a user that has the following capability, see the Admin Guide -[here](http://docs.ceph.com/docs/master/radosgw/admin/#add-remove-admin-capabilities) -for more details. +- This exporter requires a user that has the following capability, see the Admin Guide + [here](http://docs.ceph.com/docs/master/radosgw/admin/#add-remove-admin-capabilities) for more + details. ``` "caps": [ @@ -64,57 +63,59 @@ for more details. } ``` -**Note:** If using a loadbalancer in front of your RADOSGWs, please make sure your timeouts are set appropriately as clusters with a large number of buckets, or large number of users+buckets could cause the usage query to exceed the loadbalancer timeout. +**Note:** If using a loadbalancer in front of your RADOSGWs, please make sure your timeouts are set +appropriately as clusters with a large number of buckets, or large number of users+buckets could +cause the usage query to exceed the loadbalancer timeout. For haproxy the timeout in question is `timeout server` ## Local Installation -``` + +```bash git clone git@github.com:blemmenes/radosgw_usage_exporter.git cd radosgw_usage_exporter pip install requirements.txt ``` -### Usage -``` -usage: radosgw_usage_exporter.py [-h] [-H HOST] [-e ADMIN_ENTRY] - [-a ACCESS_KEY] [-s SECRET_KEY] [-p PORT] - -RADOSGW address and local binding port as well as S3 access_key and secret_key - -optional arguments: - -h, --help show this help message and exit - -H HOST, --host HOST Server URL for the RADOSGW api (example: - http://objects.dreamhost.com/) - -e ADMIN_ENTRY, --admin_entry ADMIN_ENTRY - The entry point for an admin request URL [default is - 'admin'] - -a ACCESS_KEY, --access_key ACCESS_KEY - S3 access key - -s SECRET_KEY, --secret_key SECRET_KEY - S3 secrest key - -p PORT, --port PORT Port to listen -``` +## Config + +| _Arg_ | _Env_ | _Description_ | _Default_ | +| ------------------ | ---------------- | ----------------------------------------------------------------------- | ------------------- | +| `-H --host` | `RADOSGW_SERVER` | Server URL for the RADOSGW api (example: http://objects.dreamhost.com/) | `http://radosgw:80` | +| `-e --admin-entry` | `ADMIN_ENTRY` | The entry point for an admin request URL | `admin` | +| `-a --access-key` | `ACCESS_KEY` | S3 access key | `NA` | +| `-s --secret-key` | `SECRET_KEY` | S3 secret key | `NA` | +| `-k --insecure` | | Allow insecure server connections when using SSL | `false` | +| `-p --port` | VIRTUAL_PORT | Port to listen | `9242` | +| `-S --store` | STORE | Store name added to metrics | `us-east-1` | +| `-t --timeout` | TIMEOUT | Timeout when getting metrics | `us-east-1` | +| `-l --log-level` | LOG_LEVEL | Provide logging level: DEBUG, INFO, WARNING, ERROR or CRITICAL | `INFO` | ### Example -``` + +```bash ./check_ceph_rgw_api -H https://objects.dreamhost.com/ -a JXUABTZZYHAFLCMF9VYV -s jjP8RDD0R156atS6ACSy2vNdJLdEPM0TJQ5jD1pw ``` ## Docker Usage -Docker build (https://hub.docker.com/r/blemmenes/radosgw_usage_exporter/): -``` -docker run -d -p 9242 blemmenes/radosgw_usage_exporter:latest \ + +Docker build +(https://github.com/pando85/radosgw_usage_exporter/pkgs/container/radosgw_usage_exporter): + +```bash +docker run -d -p 9242 ghcr.io/pando85/radosgw_usage_exporter:latest \ -H -a -s -p 9242 ``` + Arguments can also be specified by environment variables as well. -``` + +```bash docker run -d -p 9242:9242 \ -e "RADOSGW_SERVER=" \ -e "VIRTUAL_PORT=9242" \ -e "ACCESS_KEY=" \ -e "SECRET_KEY=" \ -blemmenes/radosgw_usage_exporter:latest +ghcr.io/pando85/radosgw_usage_exporter:latest ``` Resulting metrics can be then retrieved via your Prometheus server via the diff --git a/radosgw_usage_exporter.py b/radosgw_usage_exporter.py index c2d2298..c39d4df 100644 --- a/radosgw_usage_exporter.py +++ b/radosgw_usage_exporter.py @@ -538,7 +538,7 @@ def parse_args(): "-s", "--secret-key", required=False, - help="S3 secrest key", + help="S3 secret key", default=os.environ.get("SECRET_KEY", "NA"), ) parser.add_argument( @@ -559,7 +559,7 @@ def parse_args(): "-S", "--store", required=False, - help="store name added to metrics", + help="Store name added to metrics", default=os.environ.get("STORE", "us-east-1"), ) parser.add_argument( @@ -573,7 +573,7 @@ def parse_args(): "-l", "--log-level", required=False, - help="Provide logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL", + help="Provide logging level: DEBUG, INFO, WARNING, ERROR or CRITICAL", default=os.environ.get("LOG_LEVEL", "INFO"), ) From 488dce6a201301b306265221cd487839c3c5c339 Mon Sep 17 00:00:00 2001 From: Alexander Gil Date: Thu, 2 Nov 2023 13:46:46 +0100 Subject: [PATCH 7/7] docs: Add kubernetes example --- README.md | 9 +++- examples/k8s/k8s/deployment.yaml | 72 +++++++++++++++++++++++++ examples/k8s/k8s/object-store-user.yaml | 12 +++++ examples/k8s/k8s/service-monitor.yaml | 19 +++++++ examples/k8s/k8s/service.yaml | 16 ++++++ 5 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 examples/k8s/k8s/deployment.yaml create mode 100644 examples/k8s/k8s/object-store-user.yaml create mode 100644 examples/k8s/k8s/service-monitor.yaml create mode 100644 examples/k8s/k8s/service.yaml diff --git a/README.md b/README.md index e9b06a9..ce49fa9 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ pip install requirements.txt | `-k --insecure` | | Allow insecure server connections when using SSL | `false` | | `-p --port` | VIRTUAL_PORT | Port to listen | `9242` | | `-S --store` | STORE | Store name added to metrics | `us-east-1` | -| `-t --timeout` | TIMEOUT | Timeout when getting metrics | `us-east-1` | +| `-t --timeout` | TIMEOUT | Timeout when getting metrics | `60` | | `-l --log-level` | LOG_LEVEL | Provide logging level: DEBUG, INFO, WARNING, ERROR or CRITICAL | `INFO` | ### Example @@ -97,7 +97,7 @@ pip install requirements.txt ./check_ceph_rgw_api -H https://objects.dreamhost.com/ -a JXUABTZZYHAFLCMF9VYV -s jjP8RDD0R156atS6ACSy2vNdJLdEPM0TJQ5jD1pw ``` -## Docker Usage +## Docker Docker build (https://github.com/pando85/radosgw_usage_exporter/pkgs/container/radosgw_usage_exporter): @@ -120,3 +120,8 @@ ghcr.io/pando85/radosgw_usage_exporter:latest Resulting metrics can be then retrieved via your Prometheus server via the `http://:9242/metrics` endpoint. + +## Kubernetes + +You can find an example of deployment using [Rook](https://rook.io/) operator in a K8s environment +in `examples/k8s` directory. diff --git a/examples/k8s/k8s/deployment.yaml b/examples/k8s/k8s/deployment.yaml new file mode 100644 index 0000000..39f24a3 --- /dev/null +++ b/examples/k8s/k8s/deployment.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-buckets-usage-exporter + labels: + app.kubernetes.io/name: prometheus-buckets-usage-exporter +spec: + replicas: 1 + revisionHistoryLimit: 3 + selector: + matchLabels: + app.kubernetes.io/name: prometheus-buckets-usage-exporter + template: + metadata: + labels: + app.kubernetes.io/name: prometheus-buckets-usage-exporter + spec: + containers: + - image: ghcr.io/pando85/radosgw_usage_exporter:latest + env: + - name: ACCESS_KEY + valueFrom: + secretKeyRef: + key: AccessKey + name: rook-ceph-object-user-us-east-1-buckets-usage-exporter + - name: SECRET_KEY + valueFrom: + secretKeyRef: + key: SecretKey + name: rook-ceph-object-user-us-east-1-buckets-usage-exporter + - name: RADOSGW_SERVER + valueFrom: + secretKeyRef: + key: Endpoint + name: rook-ceph-object-user-us-east-1-buckets-usage-exporter + - name: VIRTUAL_PORT + value: "9242" + - name: STORE + value: eu-central-1a + - name: LOG_LEVEL + value: INFO + - name: TIMEOUT + value: "60" + args: + - --insecure + name: exporter + ports: + - containerPort: 9242 + name: http + protocol: TCP + resources: + limits: + cpu: 500m + memory: 512Mi + requests: + cpu: 10m + memory: 40Mi + livenessProbe: + tcpSocket: + port: http + readinessProbe: + tcpSocket: + port: http + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + securityContext: + runAsNonRoot: true + runAsUser: 1000 diff --git a/examples/k8s/k8s/object-store-user.yaml b/examples/k8s/k8s/object-store-user.yaml new file mode 100644 index 0000000..8371e15 --- /dev/null +++ b/examples/k8s/k8s/object-store-user.yaml @@ -0,0 +1,12 @@ +apiVersion: ceph.rook.io/v1 +kind: CephObjectStoreUser +metadata: + name: buckets-usage-exporter +spec: + store: us-east-1 + displayName: buckets-usage-exporter + capabilities: + bucket: read + metadata: read + usage: read + user: read diff --git a/examples/k8s/k8s/service-monitor.yaml b/examples/k8s/k8s/service-monitor.yaml new file mode 100644 index 0000000..493adf7 --- /dev/null +++ b/examples/k8s/k8s/service-monitor.yaml @@ -0,0 +1,19 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: prometheus-buckets-usage-exporter + labels: + app.kubernetes.io/name: prometheus-buckets-usage-exporter +spec: + selector: + matchLabels: + app.kubernetes.io/name: prometheus-buckets-usage-exporter + endpoints: + - honorLabels: true + interval: 90s + path: /metrics + port: http + scheme: http + scrapeTimeout: 60s + jobLabel: prometheus-buckets-usage-exporter + diff --git a/examples/k8s/k8s/service.yaml b/examples/k8s/k8s/service.yaml new file mode 100644 index 0000000..ae258dc --- /dev/null +++ b/examples/k8s/k8s/service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: prometheus-buckets-usage-exporter + labels: + app.kubernetes.io/name: prometheus-buckets-usage-exporter +spec: + selector: + app.kubernetes.io/name: prometheus-buckets-usage-exporter + ports: + - name: http + port: 9242 + protocol: TCP + targetPort: 9242 + +