Skip to content

Commit

Permalink
Merge pull request buildbot#7346 from p12tic/build-lock-duration
Browse files Browse the repository at this point in the history
Lock time tracking improvements
  • Loading branch information
p12tic authored Dec 31, 2023
2 parents 60ed03c + 0ca3d99 commit 2bc2df9
Show file tree
Hide file tree
Showing 33 changed files with 703 additions and 192 deletions.
1 change: 1 addition & 0 deletions common/code_spelling_ignore_words.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1669,6 +1669,7 @@ workerdir
workerenvironment
workerforbuilder
workerforbuilders
workerid
workerlock
workermanager
workername
Expand Down
9 changes: 9 additions & 0 deletions master/buildbot/data/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def db2data(self, dbdict):
'masterid': dbdict['masterid'],
'started_at': dbdict['started_at'],
'complete_at': dbdict['complete_at'],
"locks_duration_s": dbdict["locks_duration_s"],
'complete': dbdict['complete_at'] is not None,
'state_string': dbdict['state_string'],
'results': dbdict['results'],
Expand All @@ -66,6 +67,7 @@ def db2data(self, dbdict):
'masterid': 'builds.masterid',
'started_at': 'builds.started_at',
'complete_at': 'builds.complete_at',
"locks_duration_s": "builds.locks_duration_s",
'state_string': 'builds.state_string',
'results': 'builds.results',
}
Expand Down Expand Up @@ -210,6 +212,7 @@ class EntityType(types.Entity):
started_at = types.DateTime()
complete = types.Boolean()
complete_at = types.NoneOk(types.DateTime())
locks_duration_s = types.Integer()
results = types.NoneOk(types.Integer())
state_string = types.String()
properties = types.NoneOk(types.SourcedProperties())
Expand Down Expand Up @@ -244,6 +247,12 @@ def setBuildStateString(self, buildid, state_string):
yield self.generateEvent(buildid, "update")
return res

@base.updateMethod
@defer.inlineCallbacks
def add_build_locks_duration(self, buildid, duration_s):
yield self.master.db.builds.add_build_locks_duration(buildid=buildid, duration_s=duration_s)
yield self.generateEvent(buildid, "update")

@base.updateMethod
@defer.inlineCallbacks
def finishBuild(self, buildid, results):
Expand Down
19 changes: 15 additions & 4 deletions master/buildbot/data/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,25 @@ def addStep(self, buildid, name):

@base.updateMethod
@defer.inlineCallbacks
def startStep(self, stepid):
yield self.master.db.steps.startStep(stepid=stepid)
def startStep(self, stepid, started_at=None, locks_acquired=False):
if started_at is None:
started_at = int(self.master.reactor.seconds())
yield self.master.db.steps.startStep(
stepid=stepid,
started_at=started_at,
locks_acquired=locks_acquired
)
yield self.generateEvent(stepid, 'started')

@base.updateMethod
@defer.inlineCallbacks
def set_step_locks_acquired_at(self, stepid):
yield self.master.db.steps.set_step_locks_acquired_at(stepid=stepid)
def set_step_locks_acquired_at(self, stepid, locks_acquired_at=None):
if locks_acquired_at is None:
locks_acquired_at = int(self.master.reactor.seconds())

yield self.master.db.steps.set_step_locks_acquired_at(
stepid=stepid, locks_acquired_at=locks_acquired_at
)
yield self.generateEvent(stepid, 'updated')

@base.updateMethod
Expand Down
13 changes: 13 additions & 0 deletions master/buildbot/db/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def thd(conn):
"masterid": masterid,
"started_at": started_at,
"complete_at": None,
"locks_duration_s": 0,
"state_string": state_string
})
except (sa.exc.IntegrityError, sa.exc.ProgrammingError) as e:
Expand Down Expand Up @@ -251,6 +252,17 @@ def thd(conn):
{"value": value_js, "source": source})
yield self.db.pool.do(thd)

@defer.inlineCallbacks
def add_build_locks_duration(self, buildid, duration_s):
def thd(conn):
builds_tbl = self.db.model.builds
conn.execute(
builds_tbl.update(builds_tbl.c.id == buildid).values(
locks_duration_s=builds_tbl.c.locks_duration_s + duration_s
)
)
yield self.db.pool.do(thd)

def _builddictFromRow(self, row):
return {
"id": row.id,
Expand All @@ -261,6 +273,7 @@ def _builddictFromRow(self, row):
"masterid": row.masterid,
"started_at": epoch2datetime(row.started_at),
"complete_at": epoch2datetime(row.complete_at),
"locks_duration_s": row.locks_duration_s,
"state_string": row.state_string,
"results": row.results
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# This file is part of Buildbot. Buildbot is free software: you can
# redistribute it and/or modify it under the terms of the GNU General Public
# License as published by the Free Software Foundation, version 2.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 51
# Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Copyright Buildbot Team Members

"""add pause_reason column to workers table
Revision ID: 066
Revises: 065
"""
import sqlalchemy as sa
from alembic import op

from buildbot.util import sautils

# revision identifiers, used by Alembic.
revision = "066"
down_revision = "065"
branch_labels = None
depends_on = None


def upgrade():
op.add_column("builds", sa.Column("locks_duration_s", sa.Integer, nullable=True))

metadata = sa.MetaData()
builds_tbl = sautils.Table(
"builds", metadata,
sa.Column("locks_duration_s", sa.Integer, nullable=True)
)

op.execute(builds_tbl.update(values={builds_tbl.c.locks_duration_s: 0}))

with op.batch_alter_table("builds") as batch_op:
batch_op.alter_column("locks_duration_s", existing_type=sa.Integer, nullable=False)


def downgrade():
op.drop_column("builds", "locks_duration_s")
3 changes: 3 additions & 0 deletions master/buildbot/db/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,9 @@ class Model(base.DBConnectorComponent):
# start/complete times
sa.Column('started_at', sa.Integer, nullable=False),
sa.Column('complete_at', sa.Integer),
# Contains total duration that completed steps spent waiting for locks. Currently running
# step is not included.
sa.Column("locks_duration_s", sa.Integer, nullable=False),
sa.Column('state_string', sa.Text, nullable=False),
sa.Column('results', sa.Integer),
)
Expand Down
13 changes: 6 additions & 7 deletions master/buildbot/db/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,19 +118,18 @@ def thd(conn):
return self.db.pool.do(thd)

@defer.inlineCallbacks
def startStep(self, stepid):
started_at = int(self.master.reactor.seconds())

def startStep(self, stepid, started_at, locks_acquired):
def thd(conn):
tbl = self.db.model.steps
q = tbl.update(whereclause=tbl.c.id == stepid)
conn.execute(q, started_at=started_at)
if locks_acquired:
conn.execute(q, started_at=started_at, locks_acquired_at=started_at)
else:
conn.execute(q, started_at=started_at)
yield self.db.pool.do(thd)

@defer.inlineCallbacks
def set_step_locks_acquired_at(self, stepid):
locks_acquired_at = int(self.master.reactor.seconds())

def set_step_locks_acquired_at(self, stepid, locks_acquired_at):
def thd(conn):
tbl = self.db.model.steps
q = tbl.update(whereclause=tbl.c.id == stepid)
Expand Down
74 changes: 49 additions & 25 deletions master/buildbot/process/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ def __init__(self, requests):
self.sources = requests[0].mergeSourceStampsWith(requests[1:])
self.reason = requests[0].mergeReasons(requests[1:])

self._preparation_step = None
self._locks_acquire_step = None
self.currentStep = None

self.workerEnvironment = {}
self.buildid = None
self._buildid_notifier = Notifier()
Expand Down Expand Up @@ -339,28 +342,32 @@ def startBuild(self, workerforbuilder):
# the preparation step counts the time needed for preparing the worker and getting the
# locks.
# we cannot use a real step as we don't have a worker yet.
self.preparation_step = buildstep.create_step_from_step_or_factory(
self._preparation_step = buildstep.create_step_from_step_or_factory(
buildstep.BuildStep(name="worker_preparation")
)
self.preparation_step.setBuild(self)
yield self.preparation_step.addStep()

# TODO: the time consuming actions during worker preparation are as follows:
# - worker substantiation
# - acquiring build locks
# Since locks_acquire_at calculates the time since beginning of the step until the end,
# it's impossible to represent this sequence using a single step. In the future it makes
# sense to add two steps: one for worker substantiation and another for acquiring build
# locks.
yield self.master.data.updates.set_step_locks_acquired_at(self.preparation_step.stepid)
self._preparation_step.setBuild(self)
yield self._preparation_step.addStep()
yield self.master.data.updates.startStep(
self._preparation_step.stepid, locks_acquired=True
)

Build.setupBuildProperties(self.getProperties(), self.requests,
self.sources, self.number)

# then narrow WorkerLocks down to the right worker
yield self._setup_locks()
metrics.MetricCountEvent.log('active_builds', 1)

if self._locks_to_acquire:
# Note that most of the time locks will already free because build distributor does
# not start builds that cannot acquire locks immediately. However on a loaded master
# it may happen that more builds are cleared to start than there are free locks. In
# such case some of the builds will be blocked and wait for the locks.
self._locks_acquire_step = buildstep.create_step_from_step_or_factory(
buildstep.BuildStep(name="locks_acquire")
)
self._locks_acquire_step.setBuild(self)
yield self._locks_acquire_step.addStep()

# make sure properties are available to people listening on 'new'
# events
yield self.master.data.updates.setBuildProperties(self.buildid, self)
Expand Down Expand Up @@ -422,6 +429,11 @@ def startBuild(self, workerforbuilder):
yield self.buildFinished(["worker", "not", "pinged"], RETRY)
return

yield self.master.data.updates.setStepStateString(
self._preparation_step.stepid, f"worker {self.getWorkerName()} ready"
)
yield self.master.data.updates.finishStep(self._preparation_step.stepid, SUCCESS, False)

self.conn = workerforbuilder.worker.conn

# To retrieve the builddir property, the worker must be attached as we
Expand All @@ -440,13 +452,25 @@ def startBuild(self, workerforbuilder):
yield self.buildFinished(["worker", "not", "building"], RETRY)
return

yield self.master.data.updates.setBuildStateString(self.buildid,
'acquiring locks')
yield self.acquireLocks()

readymsg = f"worker {self.getWorkerName()} ready"
yield self.master.data.updates.setStepStateString(self.preparation_step.stepid, readymsg)
yield self.master.data.updates.finishStep(self.preparation_step.stepid, SUCCESS, False)
if self._locks_to_acquire:
yield self.master.data.updates.setBuildStateString(self.buildid, "acquiring locks")
locks_acquire_start_at = int(self.master.reactor.seconds())
yield self.master.data.updates.startStep(
self._locks_acquire_step.stepid, started_at=locks_acquire_start_at
)
yield self.acquireLocks()
locks_acquired_at = int(self.master.reactor.seconds())
yield self.master.data.updates.set_step_locks_acquired_at(
self._locks_acquire_step.stepid, locks_acquired_at=locks_acquired_at
)
yield self.master.data.updates.add_build_locks_duration(
self.buildid, duration_s=locks_acquired_at - locks_acquire_start_at
)
yield self.master.data.updates.setStepStateString(
self._locks_acquire_step.stepid, "locks acquired")
yield self.master.data.updates.finishStep(
self._locks_acquire_step.stepid, SUCCESS, False
)

yield self.master.data.updates.setBuildStateString(self.buildid,
'building')
Expand All @@ -460,18 +484,18 @@ def buildPreparationFailure(self, why, state_string):
# if self.stopped, then this failure is a LatentWorker's failure to substantiate
# which we triggered on purpose in stopBuild()
log.msg("worker stopped while " + state_string, why)
yield self.master.data.updates.finishStep(self.preparation_step.stepid,
yield self.master.data.updates.finishStep(self._preparation_step.stepid,
CANCELLED, False)
else:
log.err(why, "while " + state_string)
self.workerforbuilder.worker.putInQuarantine()
if isinstance(why, failure.Failure):
yield self.preparation_step.addLogWithFailure(why)
yield self._preparation_step.addLogWithFailure(why)
elif isinstance(why, Exception):
yield self.preparation_step.addLogWithException(why)
yield self.master.data.updates.setStepStateString(self.preparation_step.stepid,
yield self._preparation_step.addLogWithException(why)
yield self.master.data.updates.setStepStateString(self._preparation_step.stepid,
"error while " + state_string)
yield self.master.data.updates.finishStep(self.preparation_step.stepid,
yield self.master.data.updates.finishStep(self._preparation_step.stepid,
EXCEPTION, False)

def acquireLocks(self, res=None):
Expand Down
33 changes: 26 additions & 7 deletions master/buildbot/process/buildstep.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,13 +502,14 @@ def addStep(self):
self.stepid, self.number, self.name = yield self.master.data.updates.addStep(
buildid=self.build.buildid,
name=util.bytes2unicode(self.name))
yield self.master.data.updates.startStep(self.stepid)

@defer.inlineCallbacks
def startStep(self, remote):
self.remote = remote

yield self.addStep()
started_at = int(self.master.reactor.seconds())
yield self.master.data.updates.startStep(self.stepid, started_at=started_at)

try:
yield self._render_renderables()
Expand All @@ -525,12 +526,30 @@ def startStep(self, remote):
yield self._setup_locks()

# set up locks
yield self.acquireLocks()

if self.stopped:
raise BuildStepCancelled

yield self.master.data.updates.set_step_locks_acquired_at(self.stepid)
if self._locks_to_acquire:
yield self.acquireLocks()

if self.stopped:
raise BuildStepCancelled

locks_acquired_at = int(self.master.reactor.seconds())
yield defer.DeferredList(
[
self.master.data.updates.set_step_locks_acquired_at(
self.stepid, locks_acquired_at=locks_acquired_at
),
self.master.data.updates.add_build_locks_duration(
self.build.buildid, duration_s=locks_acquired_at - started_at
)
]
)
else:
yield self.master.data.updates.set_step_locks_acquired_at(
self.stepid, locks_acquired_at=started_at
)

if self.stopped:
raise BuildStepCancelled

yield self.addTestResultSets()
try:
Expand Down
3 changes: 3 additions & 0 deletions master/buildbot/spec/types/build.raml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ properties:
complete_at?:
description: time at which this build was complete, or None if it's still running
type: date
locks_duration_s:
description: time spent acquiring locks so far, not including any running steps
type: integer
properties?:
description: a dictionary of properties attached to build.
type: sourcedproperties
Expand Down
Loading

0 comments on commit 2bc2df9

Please sign in to comment.