Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Timezone-aware datetime objects, disable_metrics_collection bugfix #408

Merged
merged 3 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,14 @@
Change Log
==========

5.4.1
=====

* Disable idle instance check when `disable_metrics_collection` is active
* Switch to timezone aware datetime object everywhere. In particular, replace deprecated `datetime.utcnow()` with `datetime.now(timezone.utc)`.
* Update docs


5.4.0
=====

Expand Down
3 changes: 2 additions & 1 deletion docs/execution_json.rst
Original file line number Diff line number Diff line change
Expand Up @@ -572,9 +572,10 @@ The ``config`` field describes execution configuration.
- type of EBS (e.g. ``gp3``, ``gp2``, ``io1``, ``io2``)
- optional (default: gp3 (version >= ``1.0.0``) or gp2 (version < ``1.0.0``))

:disable_metrics_collection:
:disable_metrics_collection (**Not recommended**):
- <true|false>, default: false
- If true, the cloudwatch agent is not installed on the EC2 and CPU/memory/storage won't be collected and send to AWS CloudWatch. Disabling metrics collection can reduce CloudWatch associated costs.
- If true, Tibanna's check for idle or stalled instances will be disabled. Please monitor your runs accordingly.

:cloudwatch_dashboard:
- **This option is now depricated.**
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tibanna"
version = "5.4.0"
version = "5.4.1"
description = "Tibanna runs portable pipelines (in CWL/WDL) on the AWS Cloud."
authors = ["4DN-DCIC Team <[email protected]>"]
license = "MIT"
Expand Down
20 changes: 11 additions & 9 deletions tibanna/check_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ def run(self):
raise StillRunningException("job %s still running" % jobid)

def terminate_idle_instance(self, jobid, instance_id, cpu, ebs_read):

# Don't check for idle instance if we don't collect any metrics
if self.input_json['config'].get('disable_metrics_collection'):
return

if not cpu or cpu < 1.0:
# the instance wasn't terminated - otherwise it would have been captured in the previous error.
if not ebs_read or ebs_read < 1000: # minimum 1kb
Expand All @@ -161,18 +166,15 @@ def terminate_idle_instance(self, jobid, instance_id, cpu, ebs_read):
public_postrun_json = self.input_json['config'].get('public_postrun_json', False)
self.handle_postrun_json(bucket_name, jobid, self.input_json, public_read=public_postrun_json) # We need to record the end time
boto3.client('ec2').terminate_instances(InstanceIds=[instance_id])
errmsg = (
"Nothing has been running for the past hour for job %s,"
"(CPU utilization %s and EBS read %s bytes)."
) % (jobid, str(cpu), str(ebs_read))
raise EC2IdleException(errmsg)
except Exception as e:
errmsg = (
"Nothing has been running for the past hour for job %s,"
"but cannot terminate the instance - cpu utilization (%s) : %s"
) % (jobid, str(cpu), str(e))
errmsg = (f"Nothing has been running for the past hour for job {jobid}",
f", but instance could not be terminated. Error: {str(e)}")
logger.error(errmsg)
raise EC2IdleException(errmsg)

errmsg = (f"Nothing has been running for the past hour for job {jobid},",
f"(CPU utilization {str(cpu)} and EBS read {str(ebs_read)} bytes).")
raise EC2IdleException(errmsg)

def handle_postrun_json(self, bucket_name, jobid, input_json, public_read=False):
postrunjson = "%s.postrun.json" % jobid
Expand Down
8 changes: 4 additions & 4 deletions tibanna/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import shutil
import subprocess
import webbrowser
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from dateutil.tz import tzutc
from uuid import uuid4, UUID
from types import ModuleType
Expand Down Expand Up @@ -624,7 +624,7 @@ def rerun_many(self, sfn=None, stopdate='13Feb2018', stophour=13,
sfn = self.default_stepfunction_name
stophour = stophour + offset
stoptime = stopdate + ' ' + str(stophour) + ':' + str(stopminute)
stoptime_in_datetime = datetime.strptime(stoptime, '%d%b%Y %H:%M')
stoptime_in_datetime = datetime.strptime(stoptime, '%d%b%Y %H:%M').replace(tzinfo=timezone.utc)
client = boto3.client('stepfunctions')
sflist = client.list_executions(stateMachineArn=STEP_FUNCTION_ARN(sfn), statusFilter=status)
k = 0
Expand Down Expand Up @@ -1018,7 +1018,7 @@ def plot_metrics(self, job_id, sfn=None, directory='.', open_browser=True, force
if hasattr(job, 'end_time_as_datetime') and job.end_time_as_datetime:
endtime = job.end_time_as_datetime
else:
endtime = datetime.utcnow()
endtime = datetime.now(timezone.utc)
if hasattr(job, 'filesystem') and job.filesystem:
filesystem = job.filesystem
else:
Expand Down Expand Up @@ -1049,7 +1049,7 @@ def plot_metrics(self, job_id, sfn=None, directory='.', open_browser=True, force
job_complete = False # still running
else:
# waiting 10 min to be sure the istance is starting
if (datetime.utcnow() - starttime) / timedelta(minutes=1) < 5:
if (datetime.now(timezone.utc) - starttime) / timedelta(minutes=1) < 5:
raise Exception("the instance is still setting up. " +
"Wait a few seconds/minutes and try again.")
else:
Expand Down
7 changes: 3 additions & 4 deletions tibanna/cw_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
METRICS_COLLECTION_INTERVAL,
S3_ENCRYT_KEY_ID
)
from datetime import datetime
from datetime import timedelta
from datetime import datetime, timezone, timedelta
import json, math


Expand All @@ -29,9 +28,9 @@ class TibannaResource(object):

@classmethod
def convert_timestamp_to_datetime(cls, timestamp):
return datetime.strptime(timestamp, cls.timestamp_format)
return datetime.strptime(timestamp, cls.timestamp_format).replace(tzinfo=timezone.utc)

def __init__(self, instance_id, filesystem, starttime, endtime=datetime.utcnow(), cost_estimate = 0.0, cost_estimate_type = "NA"):
def __init__(self, instance_id, filesystem, starttime, endtime=datetime.now(timezone.utc), cost_estimate = 0.0, cost_estimate_type = "NA"):
"""All the Cloudwatch metrics are retrieved and stored at the initialization.
:param instance_id: e.g. 'i-0167a6c2d25ce5822'
:param filesystem: e.g. "/dev/xvdb", "/dev/nvme1n1"
Expand Down
4 changes: 2 additions & 2 deletions tibanna/job.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import boto3
import json
from datetime import datetime
from datetime import datetime, timezone
from . import create_logger
from tibanna import dd_utils
from .vars import (
Expand Down Expand Up @@ -280,7 +280,7 @@ def get_info_from_dd(ddres):

@staticmethod
def add_to_dd(job_id, execution_name, sfn, logbucket, verbose=True):
time_stamp = datetime.strftime(datetime.utcnow(), '%Y%m%d-%H:%M:%S-UTC')
time_stamp = datetime.strftime(datetime.now(timezone.utc), '%Y%m%d-%H:%M:%S-UTC')
dydb = boto3.client('dynamodb', region_name=AWS_REGION)
try:
# first check the table exists
Expand Down
12 changes: 6 additions & 6 deletions tibanna/pricing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import botocore
import re
from . import create_logger
from datetime import datetime, timedelta
from datetime import datetime, timedelta, timezone
from .utils import (
does_key_exist,
read_s3,
Expand All @@ -30,14 +30,14 @@ def get_cost(postrunjson, job_id):
job = postrunjson.Job

def reformat_time(t, delta):
d = datetime.strptime(t, '%Y%m%d-%H:%M:%S-UTC') + timedelta(days=delta)
d = datetime.strptime(t, '%Y%m%d-%H:%M:%S-UTC').replace(tzinfo=timezone.utc) + timedelta(days=delta)
return d.strftime("%Y-%m-%d")

start_time = reformat_time(job.start_time, -1) # give more room
if(job.end_time != None):
end_time = reformat_time(job.end_time, 1)
else:
end_time = datetime.utcnow() + timedelta(days=1) # give more room
end_time = datetime.now(timezone.utc) + timedelta(days=1) # give more room
end_time = end_time.strftime("%Y-%m-%d")

billing_args = {'Filter': {'Tags': {'Key': 'Name', 'Values': ['awsem-' + job_id]}},
Expand Down Expand Up @@ -73,8 +73,8 @@ def get_cost_estimate(postrunjson, ebs_root_type = "gp3", aws_price_overwrite =
logger.warning("job.end_time not available. Cannot calculate estimated cost.")
return 0.0, "NA"

job_start = datetime.strptime(job.start_time, '%Y%m%d-%H:%M:%S-UTC')
job_end = datetime.strptime(job.end_time, '%Y%m%d-%H:%M:%S-UTC')
job_start = datetime.strptime(job.start_time, '%Y%m%d-%H:%M:%S-UTC').replace(tzinfo=timezone.utc)
job_end = datetime.strptime(job.end_time, '%Y%m%d-%H:%M:%S-UTC').replace(tzinfo=timezone.utc)
job_duration = (job_end - job_start).seconds / 3600.0 # in hours

if(not job.instance_type):
Expand Down Expand Up @@ -378,7 +378,7 @@ def get_cost_estimate(postrunjson, ebs_root_type = "gp3", aws_price_overwrite =
) * job_duration / (24.0*30.0)
estimated_cost = estimated_cost + ebs_iops_cost

time_since_run = (datetime.utcnow() - job_end).total_seconds() / (3600 * 24) # days
time_since_run = (datetime.now(timezone.utc) - job_end).total_seconds() / (3600 * 24) # days
estimation_type = "retrospective estimate" if time_since_run > 10 else "immediate estimate"

return estimated_cost, estimation_type
Expand Down
2 changes: 1 addition & 1 deletion tibanna/top.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ def timestamps_as_minutes(self, timestamp_start):

@classmethod
def as_datetime(cls, timestamp):
return datetime.datetime.strptime(timestamp, cls.timestamp_format)
return datetime.datetime.strptime(timestamp, cls.timestamp_format).replace(tzinfo=datetime.timezone.utc)

@staticmethod
def wrap_in_double_quotes(string):
Expand Down
6 changes: 2 additions & 4 deletions tibanna/vars.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import boto3
import sys
from datetime import datetime
from dateutil.tz import tzutc
from datetime import datetime, timezone
from ._version import __version__
from . import create_logger

Expand Down Expand Up @@ -137,8 +136,7 @@


def PARSE_AWSEM_TIME(t_str):
t = datetime.strptime(t_str, AWSEM_TIME_STAMP_FORMAT)
return t.replace(tzinfo=tzutc())
return datetime.strptime(t_str, AWSEM_TIME_STAMP_FORMAT).replace(tzinfo=timezone.utc)


# EBS mount path for cloudwatch metric collection
Expand Down
Loading