Skip to content

Commit

Permalink
Merge pull request #10797 from tangruotian/issue-10795
Browse files Browse the repository at this point in the history
bugfix: Agent复用同级节点时跳过了复用锁 #10795
  • Loading branch information
bkci-bot authored Aug 13, 2024
2 parents e157675 + 072389a commit 7b17c53
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,9 @@
"type" : "string"
}
}
},
"skip-wip" : {
"type" : "boolean"
}
}
} ]
Expand Down Expand Up @@ -943,6 +946,9 @@
"type" : "string"
}
}
},
"skip-wip" : {
"type" : "boolean"
}
}
} ]
Expand Down Expand Up @@ -2226,6 +2232,12 @@
"type" : "string"
}
},
"chat-id" : {
"type" : "array",
"items" : {
"type" : "string"
}
},
"notify-groups" : {
"type" : "array",
"items" : {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ import com.tencent.devops.environment.pojo.thirdpartyagent.EnvNodeAgent
import com.tencent.devops.environment.pojo.thirdpartyagent.ThirdPartyAgent
import com.tencent.devops.process.api.service.ServiceBuildResource
import com.tencent.devops.process.api.service.ServiceVarResource
import com.tencent.devops.process.constant.ProcessMessageCode
import com.tencent.devops.process.engine.common.VMUtils
import com.tencent.devops.process.pojo.SetContextVarData
import com.tencent.devops.process.pojo.VmInfo
Expand Down Expand Up @@ -435,22 +434,12 @@ class ThirdPartyDispatchService @Autowired constructor(
)
}/console/pipeline/${dispatchMessage.event.projectId}/$pipelineId/detail/$lockedBuildId"
if (lockedBuildId != dispatchMessage.event.buildId) {
I18nUtil.getCodeLanMessage(
messageCode = ProcessMessageCode.BK_LOCKED,
language = I18nUtil.getDefaultLocaleLanguage()
) + ": $linkTip<a target='_blank' href='$link'>" +
I18nUtil.getCodeLanMessage(
messageCode = ProcessMessageCode.BK_CLICK,
language = I18nUtil.getDefaultLocaleLanguage()
) + "</a>"
"$linkTip<a target='_blank' href='$link'>$lockedBuildId</a>"
} else {
I18nUtil.getCodeLanMessage(
messageCode = ProcessMessageCode.BK_CURRENT,
language = I18nUtil.getDefaultLocaleLanguage()
) + ": $linkTip"
linkTip
}
} ?: ""
log(
logWarn(
dispatchMessage.event,
I18nUtil.getCodeLanMessage(
messageCode = AGENT_REUSE_MUTEX_REDISPATCH,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ class AgentReuseMutexCmd @Autowired constructor(
) : ContainerCmd {
override fun canExecute(commandContext: ContainerContext): Boolean {
return commandContext.cmdFlowState == CmdFlowState.CONTINUE &&
!commandContext.buildStatus.isFinish() &&
commandContext.container.controlOption.agentReuseMutex != null
!commandContext.buildStatus.isFinish() &&
commandContext.container.controlOption.agentReuseMutex != null
}

override fun execute(commandContext: ContainerContext) {
Expand All @@ -59,22 +59,15 @@ class AgentReuseMutexCmd @Autowired constructor(
* 互斥情况存在三种
* 1、依赖某个AgentId,直接往下执行即可
* 2、Root节点,即没有复用节点的节点,根据类型先拿取锁
* 3、Reuse节点,但没有复用节点,可能是因为和复用节点同级,或存在和复用节点先后顺序不明确的,
* 这种先拿取复用的JobId看有没有,没有就按root节点的逻辑走
* 3、Reuse节点,但没有复用节点,可能是因为和复用节点同级,或存在和复用节点先后顺序不明确的,就按root节点的逻辑走
*/
private fun doExecute(commandContext: ContainerContext) {
var mutex = commandContext.container.controlOption.agentReuseMutex!!
// 复用的不用参与锁逻辑
if (!mutex.type.needEngineLock()) {
commandContext.cmdFlowState = CmdFlowState.CONTINUE
return
}
// 如果有依赖Job且不是依赖类型的可以先拿一下上下文看看有没有已经写入了,如果写入了可以直接跳过
if (mutex.reUseJobId != null &&
commandContext.variables.containsKey(AgentReuseMutex.genAgentContextKey(mutex.reUseJobId!!))
) {
commandContext.cmdFlowState = CmdFlowState.CONTINUE
return
}
// 极端情况上下文没有写入,且agentId还是空,理论上不会有,逻辑上出现了就失败
if (mutex.agentOrEnvId.isNullOrBlank()) {
return agentIdNullError(commandContext, mutex, null)
Expand All @@ -85,7 +78,7 @@ class AgentReuseMutexCmd @Autowired constructor(
AgentReuseMutexType.AGENT_ID, AgentReuseMutexType.AGENT_NAME -> {
acquireMutex(commandContext, mutex)
}

// 环境的因为无法确定节点不参与锁
else -> {
commandContext.cmdFlowState = CmdFlowState.CONTINUE
}
Expand Down Expand Up @@ -165,16 +158,18 @@ class AgentReuseMutexCmd @Autowired constructor(
commandContext.cmdFlowState = CmdFlowState.LOOP
}

else -> { // 正常运行
commandContext.cmdFlowState = CmdFlowState.CONTINUE // 检查通过,继续向下执行
// 正常运行
else -> {
// 检查通过,继续向下执行
commandContext.cmdFlowState = CmdFlowState.CONTINUE
}
}
} else if (commandContext.container.status.isFinish()) { // 对于存在重放的结束消息做闭环
val event = commandContext.event

LOG.info(
"AGENT_REUSE|ENGINE|${event.buildId}|${event.source}|status=${commandContext.container.status}" +
"|concurrent_container_event"
"|concurrent_container_event"
)

releaseContainerMutex(
Expand Down Expand Up @@ -310,7 +305,7 @@ class AgentReuseMutexCmd @Autowired constructor(
containerVar = emptyMap(), buildStatus = null,
timestamps = mapOf(
BuildTimestampType.JOB_AGENT_REUSE_MUTEX_QUEUE to
BuildRecordTimeStamp(null, LocalDateTime.now().timestampmilli())
BuildRecordTimeStamp(null, LocalDateTime.now().timestampmilli())
)
)
}
Expand Down Expand Up @@ -347,9 +342,9 @@ class AgentReuseMutexCmd @Autowired constructor(
// 排队等待时间为0的时候,立即超时, 退出队列,并失败, 没有就继续在队列中,timeOut时间为分钟
if (mutex.timeout == 0 || timeDiff > TimeUnit.MINUTES.toSeconds(mutex.timeout.toLong())) {
val desc = "${
if (mutex.timeoutVar.isNullOrBlank()) {
"[${mutex.timeout} minutes]"
} else " timeoutVar[${mutex.timeoutVar}] setup to [${mutex.timeout} minutes]"
if (mutex.timeoutVar.isNullOrBlank()) {
"[${mutex.timeout} minutes]"
} else " timeoutVar[${mutex.timeoutVar}] setup to [${mutex.timeout} minutes]"
} "
logAgentMutex(
container, mutex, lockedBuildId,
Expand Down Expand Up @@ -443,7 +438,7 @@ class AgentReuseMutexCmd @Autowired constructor(
containerVar = emptyMap(), buildStatus = null,
timestamps = mapOf(
BuildTimestampType.JOB_MUTEX_QUEUE to
BuildRecordTimeStamp(LocalDateTime.now().timestampmilli(), null)
BuildRecordTimeStamp(LocalDateTime.now().timestampmilli(), null)
)
)
}
Expand Down Expand Up @@ -514,10 +509,10 @@ class AgentReuseMutexCmd @Autowired constructor(
messageCode = ProcessMessageCode.BK_LOCKED,
language = I18nUtil.getDefaultLocaleLanguage()
) + ": $linkTip<a target='_blank' href='$link'>" +
I18nUtil.getCodeLanMessage(
messageCode = ProcessMessageCode.BK_CLICK,
language = I18nUtil.getDefaultLocaleLanguage()
) + "</a> | $msg"
I18nUtil.getCodeLanMessage(
messageCode = ProcessMessageCode.BK_CLICK,
language = I18nUtil.getDefaultLocaleLanguage()
) + "</a> | $msg"
} else {
I18nUtil.getCodeLanMessage(
messageCode = ProcessMessageCode.BK_CURRENT,
Expand Down

0 comments on commit 7b17c53

Please sign in to comment.