From 7ce48c39aeafe4cd4b3ba1dc18168ff72d769bfc Mon Sep 17 00:00:00 2001 From: Tobias Werth Date: Sat, 23 Nov 2024 14:11:03 +0100 Subject: [PATCH] Remove transaction from fetch work API. We have seen the transaction to fail, resulting in exceptions/500s. Part of fixing #2848. There is also no need to have a transaction at all. We now do check after the update whether we won instead and if not, tell the judgehost to try again. Before this, I could with 4 judgedaemons on my laptop reliably reproduce the error by just judging the example problems, seeing it ~5 times for all ~100 submissions. Afterwards, I ran this 10 times and didn't encounter any error. --- .../Controller/API/JudgehostController.php | 60 ++++++++++--------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/webapp/src/Controller/API/JudgehostController.php b/webapp/src/Controller/API/JudgehostController.php index 6de3381fd9..12e521e31a 100644 --- a/webapp/src/Controller/API/JudgehostController.php +++ b/webapp/src/Controller/API/JudgehostController.php @@ -1600,40 +1600,42 @@ public function getJudgeTasksAction(Request $request): array // This is case 2.a) from above: start something new. // This runs transactional to prevent a queue task being picked up twice. $judgetasks = null; - $this->em->wrapInTransaction(function () use ($judgehost, $max_batchsize, &$judgetasks) { - $jobid = $this->em->createQueryBuilder() - ->from(QueueTask::class, 'qt') - ->innerJoin('qt.judging', 'j') - ->select('j.judgingid') + $jobid = $this->em->createQueryBuilder() + ->from(QueueTask::class, 'qt') + ->innerJoin('qt.judging', 'j') + ->select('j.judgingid') + ->andWhere('qt.startTime IS NULL') + ->addOrderBy('qt.priority') + ->addOrderBy('qt.teamPriority') + ->setMaxResults(1) + ->getQuery() + ->getOneOrNullResult(AbstractQuery::HYDRATE_SINGLE_SCALAR); + if ($jobid !== null) { + // Mark it as being worked on. + $result = $this->em->createQueryBuilder() + ->update(QueueTask::class, 'qt') + ->set('qt.startTime', Utils::now()) + ->andWhere('qt.judging = :jobid') ->andWhere('qt.startTime IS NULL') - ->addOrderBy('qt.priority') - ->addOrderBy('qt.teamPriority') - ->setMaxResults(1) + ->setParameter('jobid', $jobid) ->getQuery() - ->getOneOrNullResult(AbstractQuery::HYDRATE_SINGLE_SCALAR); - if ($jobid === null) { - return; - } - $judgetasks = $this->getJudgetasks($jobid, $max_batchsize, $judgehost); - if (empty($judgetasks)) { - // Somehow we got ourselves in a situation that there was a queue task without remaining judge tasks. - // This should not happen, but if it does, we need to clean up. Each of the fetch-work calls will clean - // up one queue task. We need to signal to the judgehost that there might be more work to do. + ->execute(); + + if ($result == 0) { + // Another judgehost beat us to it. $judgetasks = [['type' => 'try_again']]; } else { - // Mark it as being worked on. - $this->em->createQueryBuilder() - ->update(QueueTask::class, 'qt') - ->set('qt.startTime', Utils::now()) - ->andWhere('qt.judging = :jobid') - ->andWhere('qt.startTime IS NULL') - ->setParameter('jobid', $jobid) - ->getQuery() - ->execute(); + $judgetasks = $this->getJudgetasks($jobid, $max_batchsize, $judgehost); + if (empty($judgetasks)) { + // Somehow we got ourselves in a situation that there was a queue task without remaining judge tasks. + // This should not happen, but if it does, we need to clean up. Each of the fetch-work calls will clean + // up one queue task. We need to signal to the judgehost that there might be more work to do. + $judgetasks = [['type' => 'try_again']]; + } + } + if (!empty($judgetasks)) { + return $judgetasks; } - }); - if (!empty($judgetasks)) { - return $judgetasks; } if ($this->config->get('enable_parallel_judging')) {