Skip to content

Commit

Permalink
Merge pull request juju#18073 from manadart/3.5-uniter-peer-rels-last
Browse files Browse the repository at this point in the history
juju#18073

The linked bug describes issues when removing applications with both normal integrations and peer integrations.

Hooks for some integrations rely on the application data populated by peers. When peer integrations are departed/broken first, these hooks fail, blocking tear-down.

While we generally make no guarantees to charm authors about ordering, it is reasonable to process existing integrations peer-last to prevent this situation. 

Hooks for creation and joining are unaffected.

## QA steps

- `juju bootstrap lxd test`
- `juju add-model work`
- Make sure logging config includes `unit=DEBUG`.
- `juju deploy postgresql -n 2`.
- `juju deploy postgresql pgsink`.
- `juju relate postgresql:replication-offer pgsink`.
- Wait for quiescence.
- `juju remove-application postgresql`, and wait for it to be gone.
- `juju debug-log -m work --replay|grep 'postgresql-0.*via hook'|less`
- You will see that the peer relations are departed after the replication relation.
```
unit-postgresql-0: 14:31:26 INFO juju.worker.uniter.operation ran "replication-offer-relation-departed" hook (via hook dispatching script: dispatch)
unit-postgresql-0: 14:31:31 INFO juju.worker.uniter.operation ran "pgdata-storage-detaching" hook (via hook dispatching script: dispatch)
unit-postgresql-0: 14:31:33 INFO juju.worker.uniter.operation ran "replication-offer-relation-broken" hook (via hook dispatching script: dispatch)
unit-postgresql-0: 14:31:34 INFO juju.worker.uniter.operation ran "database-peers-relation-departed" hook (via hook dispatching script: dispatch)
unit-postgresql-0: 14:31:36 INFO juju.worker.uniter.operation ran "restart-relation-departed" hook (via hook dispatching script: dispatch)
unit-postgresql-0: 14:31:37 INFO juju.worker.uniter.operation ran "upgrade-relation-departed" hook (via hook dispatching script: dispatch)
```

## Documentation changes

None.

## Links

**Launchpad bug:** https://bugs.launchpad.net/juju/+bug/1998282

**Jira card:** [JUJU-6705](https://warthogs.atlassian.net/browse/JUJU-6705)



[JUJU-6705]: https://warthogs.atlassian.net/browse/JUJU-6705?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ
  • Loading branch information
jujubot authored Sep 19, 2024
2 parents bfee060 + 415278a commit 9b31733
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 53 deletions.
101 changes: 73 additions & 28 deletions worker/uniter/relation/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ type relationsResolver struct {
}

// NextOp implements resolver.Resolver.
func (r *relationsResolver) NextOp(localState resolver.LocalState, remoteState remotestate.Snapshot, opFactory operation.Factory) (_ operation.Operation, err error) {
func (r *relationsResolver) NextOp(
localState resolver.LocalState, remoteState remotestate.Snapshot, opFactory operation.Factory,
) (_ operation.Operation, err error) {
if r.logger.IsTraceEnabled() {
r.logger.Tracef("relation resolver next op for new remote relations %# v", pretty.Formatter(remoteState.Relations))
defer func() {
if err == resolver.ErrNoOperation {
if errors.Is(err, resolver.ErrNoOperation) {
r.logger.Tracef("no relation operation to run")
}
}()
Expand All @@ -72,42 +74,85 @@ func (r *relationsResolver) NextOp(localState resolver.LocalState, remoteState r
return nil, errors.Trace(err)
}

// Check whether we need to fire a hook for any of the relations
for relationId, relationSnapshot := range remoteState.Relations {
if !r.stateTracker.IsKnown(relationId) {
r.logger.Tracef("unknown relation %d resolving next op", relationId)
continue
} else if isImplicit, _ := r.stateTracker.IsImplicit(relationId); isImplicit {
// Collect peer relations and defer their processing until after other
// relations. This is simpler than implementing a sort based on the type.
// Processing them last ensures that upon application removal, the hooks
// for other relations can rely on the peer relations still being present.
var peerRelations []int

// Check whether we need to fire a hook for any of the non-peer relations.
for relationID, relationSnapshot := range remoteState.Relations {
if isPeer, _ := r.stateTracker.IsPeerRelation(relationID); isPeer {
peerRelations = append(peerRelations, relationID)
continue
}

// If either the unit or the relation are Dying, or the
// relation becomes suspended, then the relation should be
// broken.
var remoteBroken bool
if remoteState.Life == life.Dying || relationSnapshot.Life == life.Dying || relationSnapshot.Suspended {
relationSnapshot = remotestate.RelationSnapshot{}
remoteBroken = true
// TODO(axw) if relation is implicit, leave scope & remove.
op, err := r.processRelationSnapshot(relationID, relationSnapshot, remoteState, opFactory)
if err != nil {
if errors.Is(err, resolver.ErrNoOperation) {
continue
}
return nil, errors.Trace(err)
}
return op, nil
}

// Process the deferred peer relations.
for _, relationID := range peerRelations {
relationSnapshot := remoteState.Relations[relationID]

// Examine local/remote states and figure out if a hook needs
// to be fired for this relation.
relState, err := r.stateTracker.State(relationId)
op, err := r.processRelationSnapshot(relationID, relationSnapshot, remoteState, opFactory)
if err != nil {
//
relState = NewState(relationId)
}
hInfo, err := r.nextHookForRelation(relState, relationSnapshot, remoteBroken)
if err == resolver.ErrNoOperation {
continue
if errors.Is(err, resolver.ErrNoOperation) {
continue
}
return nil, errors.Trace(err)
}
return opFactory.NewRunHook(hInfo)
return op, nil
}

return nil, resolver.ErrNoOperation
}

// processRelationSnapshot reconciles the local and remote states for a
// single relation and determines what hoof (if any) should be fired.
func (r *relationsResolver) processRelationSnapshot(
relationID int,
relationSnapshot remotestate.RelationSnapshot,
remoteState remotestate.Snapshot,
opFactory operation.Factory,
) (operation.Operation, error) {
if !r.stateTracker.IsKnown(relationID) {
r.logger.Infof("unknown relation %d resolving next op", relationID)
return nil, resolver.ErrNoOperation
} else if isImplicit, _ := r.stateTracker.IsImplicit(relationID); isImplicit {
return nil, resolver.ErrNoOperation
}

// If either the unit or the relation are Dying, or the relation
// becomes suspended, then the relation should be broken.
var remoteBroken bool
if remoteState.Life == life.Dying || relationSnapshot.Life == life.Dying || relationSnapshot.Suspended {
relationSnapshot = remotestate.RelationSnapshot{}
remoteBroken = true
}

// Examine local/remote states and figure out if a hook needs
// to be fired for this relation.
relState, err := r.stateTracker.State(relationID)
if err != nil {
if !errors.Is(err, errors.NotFound) {
return nil, errors.Trace(err)
}
relState = NewState(relationID)
}
hInfo, err := r.nextHookForRelation(relState, relationSnapshot, remoteBroken)
if err != nil {
return nil, errors.Trace(err)
}
return opFactory.NewRunHook(hInfo)
}

// maybeDestroySubordinates checks whether the remote state indicates that the
// unit is dying and ensures that any related subordinates are properly
// destroyed.
Expand Down Expand Up @@ -343,7 +388,7 @@ func (r *createdRelationsResolver) NextOp(
if r.logger.IsTraceEnabled() {
r.logger.Tracef("create relation resolver next op for new remote relations %# v", pretty.Formatter(remoteState.Relations))
defer func() {
if err == resolver.ErrNoOperation {
if errors.Is(err, resolver.ErrNoOperation) {
r.logger.Tracef("no create relation operation to run")
}
}()
Expand All @@ -369,7 +414,7 @@ func (r *createdRelationsResolver) NextOp(

hook, err := r.nextHookForRelation(relationId)
if err != nil {
if err == resolver.ErrNoOperation {
if errors.Is(err, resolver.ErrNoOperation) {
continue
}

Expand Down
69 changes: 44 additions & 25 deletions worker/uniter/relation/resolver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func assertNumCalls(c *gc.C, numCalls *int32, expected int32) {
c.Assert(v, gc.Equals, expected)
}

func (s *relationResolverSuite) newRelationStateTracer(c *gc.C, apiCaller base.APICaller, unitTag names.UnitTag) relation.RelationStateTracker {
func (s *relationResolverSuite) newRelationStateTracker(c *gc.C, apiCaller base.APICaller, unitTag names.UnitTag) relation.RelationStateTracker {
abort := make(chan struct{})
st := uniter.NewState(apiCaller, unitTag)
u, err := st.Unit(unitTag)
Expand Down Expand Up @@ -166,7 +166,7 @@ func (s *relationResolverSuite) setupRelations(c *gc.C) relation.RelationStateTr
uniterAPICall("State", unitEntity, unitStateResults, nil),
uniterAPICall("RelationsStatus", unitEntity, params.RelationUnitStatusResults{Results: []params.RelationUnitStatusResult{{RelationResults: []params.RelationUnitStatus{}}}}, nil),
)
r := s.newRelationStateTracer(c, apiCaller, unitTag)
r := s.newRelationStateTracker(c, apiCaller, unitTag)
assertNumCalls(c, &numCalls, 4)
return r
}
Expand Down Expand Up @@ -231,7 +231,7 @@ func (s *relationResolverSuite) assertNewRelationsWithExistingRelations(c *gc.C,
)
}
apiCaller := mockAPICaller(c, &numCalls, apiCalls...)
r := s.newRelationStateTracer(c, apiCaller, unitTag)
r := s.newRelationStateTracker(c, apiCaller, unitTag)
assertNumCalls(c, &numCalls, int32(len(apiCalls)))

info := r.GetInfo()
Expand Down Expand Up @@ -268,7 +268,7 @@ func (s *relationResolverSuite) TestNextOpNothing(c *gc.C) {
uniterAPICall("State", unitEntity, unitStateResults, nil),
uniterAPICall("RelationsStatus", unitEntity, params.RelationUnitStatusResults{Results: []params.RelationUnitStatusResult{{RelationResults: []params.RelationUnitStatus{}}}}, nil),
)
r := s.newRelationStateTracer(c, apiCaller, unitTag)
r := s.newRelationStateTracker(c, apiCaller, unitTag)
assertNumCalls(c, &numCalls, 4)

localState := resolver.LocalState{
Expand Down Expand Up @@ -393,7 +393,7 @@ func (s *relationResolverSuite) assertHookRelationJoined(c *gc.C, numCalls *int3
unitTag := names.NewUnitTag("wordpress/0")

apiCaller := mockAPICaller(c, numCalls, apiCalls...)
r := s.newRelationStateTracer(c, apiCaller, unitTag)
r := s.newRelationStateTracker(c, apiCaller, unitTag)
assertNumCalls(c, numCalls, 4)

localState := resolver.LocalState{
Expand Down Expand Up @@ -839,7 +839,7 @@ func (s *relationResolverSuite) TestImplicitRelationNoHooks(c *gc.C) {

var numCalls int32
apiCaller := mockAPICaller(c, &numCalls, apiCalls...)
r := s.newRelationStateTracer(c, apiCaller, unitTag)
r := s.newRelationStateTracker(c, apiCaller, unitTag)

localState := resolver.LocalState{
State: operation.State{
Expand Down Expand Up @@ -979,7 +979,7 @@ func (s *relationResolverSuite) TestSubSubPrincipalRelationDyingDestroysUnit(c *
//apiCalls = append(apiCalls, uniterAPICall("State", nrpeUnitEntity, unitStateResults, nil))
apiCaller := mockAPICaller(c, &numCalls, apiCalls...)

r := s.newRelationStateTracer(c, apiCaller, nrpeUnitTag)
r := s.newRelationStateTracker(c, apiCaller, nrpeUnitTag)
assertNumCalls(c, &numCalls, callsBeforeDestroy)

// So now we have a relations object with two relations, one to
Expand Down Expand Up @@ -1035,7 +1035,7 @@ func (s *relationResolverSuite) TestSubSubOtherRelationDyingNotDestroyed(c *gc.C

apiCaller := mockAPICaller(c, &numCalls, apiCalls...)

r := s.newRelationStateTracer(c, apiCaller, nrpeUnitTag)
r := s.newRelationStateTracker(c, apiCaller, nrpeUnitTag)

// TODO: Fix this test...
// This test intermittently makes either 16 or 17
Expand Down Expand Up @@ -1154,7 +1154,7 @@ func (s *relationResolverSuite) TestPrincipalDyingDestroysSubordinates(c *gc.C)
//apiCalls = append(apiCalls, uniterAPICall("State", nrpeUnitEntity, unitStateResults, nil))
apiCaller := mockAPICaller(c, &numCalls, apiCalls...)

r := s.newRelationStateTracer(c, apiCaller, nrpeUnitTag)
r := s.newRelationStateTracker(c, apiCaller, nrpeUnitTag)
assertNumCalls(c, &numCalls, callsBeforeDestroy)

// So now we have a relation between a principal (wordpress) and a
Expand Down Expand Up @@ -1359,7 +1359,8 @@ func (s *mockRelationResolverSuite) TestHookRelationJoined(c *gc.C) {
s.expectIsKnown(1)
s.expectIsImplicitFalse(1)
s.expectStateUnknown(1)
s.expectIsPeerRelationFalse(1)

s.mockRelStTracker.EXPECT().IsPeerRelation(1).Return(false, nil).Times(2)

relationsResolver := s.newRelationResolver(s.mockRelStTracker, s.mockSupDestroyer)
op, err := relationsResolver.NextOp(localState, remoteState, &mockOperations{})
Expand Down Expand Up @@ -1402,7 +1403,8 @@ func (s *mockRelationResolverSuite) TestHookRelationChangedApplication(c *gc.C)
s.expectIsKnown(1)
s.expectIsImplicitFalse(1)
s.expectState(relationState)
s.expectIsPeerRelationFalse(1)

s.mockRelStTracker.EXPECT().IsPeerRelation(1).Return(false, nil).Times(2)

relationsResolver := s.newRelationResolver(s.mockRelStTracker, s.mockSupDestroyer)
op, err := relationsResolver.NextOp(localState, remoteState, &mockOperations{})
Expand Down Expand Up @@ -1441,6 +1443,8 @@ func (s *mockRelationResolverSuite) TestHookRelationChangedSuspended(c *gc.C) {
s.expectState(relationState)
s.expectLocalUnitAndApplicationLife()

s.mockRelStTracker.EXPECT().IsPeerRelation(1).Return(false, nil)

relationsResolver := s.newRelationResolver(s.mockRelStTracker, s.mockSupDestroyer)
op, err := relationsResolver.NextOp(localState, remoteState, &mockOperations{})
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -1478,6 +1482,8 @@ func (s *mockRelationResolverSuite) TestHookRelationDeparted(c *gc.C) {
s.expectState(relationState)
s.expectLocalUnitAndApplicationLife()

s.mockRelStTracker.EXPECT().IsPeerRelation(1).Return(false, nil)

relationsResolver := s.newRelationResolver(s.mockRelStTracker, s.mockSupDestroyer)
op, err := relationsResolver.NextOp(localState, remoteState, &mockOperations{})
c.Assert(err, jc.ErrorIsNil)
Expand All @@ -1495,22 +1501,35 @@ func (s *mockRelationResolverSuite) TestHookRelationBroken(c *gc.C) {
1: {
Life: life.Dying,
},
2: {
Life: life.Dying,
},
},
}
relationState := relation.State{

defer s.setupMocks(c).Finish()

s.expectSyncScopes(remoteState)

relationState1 := relation.State{
RelationId: 1,
Members: map[string]int64{},
ApplicationMembers: map[string]int64{},
ChangedPending: "",
}
defer s.setupMocks(c).Finish()
s.expectSyncScopes(remoteState)
s.expectIsKnown(1)
s.expectIsImplicitFalse(1)
s.expectState(relationState)
s.expectIsPeerRelationFalse(1)
s.expectState(relationState1)
s.expectStateFound(1)
s.expectRemoteApplication(1, "")
s.mockRelStTracker.EXPECT().IsPeerRelation(1).Return(false, nil).Times(2)

// The `Relations` map in the remote state snapshot (as with all Go maps)
// has random iteration order. This will sometimes be called
// (relation ID 2 first) and sometimes not (ID 1 first). The test here is
// that in all cases, the next operation is for ID 1 (non-peer) - it is
// always enqueued ahead of ID 2, which is a peer relation.
s.mockRelStTracker.EXPECT().IsPeerRelation(2).Return(true, nil).MaxTimes(1)

relationsResolver := s.newRelationResolver(s.mockRelStTracker, s.mockSupDestroyer)
op, err := relationsResolver.NextOp(localState, remoteState, &mockOperations{})
Expand Down Expand Up @@ -1543,10 +1562,11 @@ func (s *mockRelationResolverSuite) TestHookRelationBrokenWhenSuspended(c *gc.C)
s.expectIsKnown(1)
s.expectIsImplicitFalse(1)
s.expectState(relationState)
s.expectIsPeerRelationFalse(1)
s.expectStateFound(1)
s.expectRemoteApplication(1, "")

s.mockRelStTracker.EXPECT().IsPeerRelation(1).Return(false, nil).Times(2)

relationsResolver := s.newRelationResolver(s.mockRelStTracker, s.mockSupDestroyer)
op, err := relationsResolver.NextOp(localState, remoteState, &mockOperations{})
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -1577,9 +1597,10 @@ func (s *mockRelationResolverSuite) TestHookRelationBrokenOnlyOnce(c *gc.C) {
s.expectIsKnown(1)
s.expectIsImplicitFalse(1)
s.expectState(relationState)
s.expectIsPeerRelationFalse(1)
s.expectStateFoundFalse(1)

s.mockRelStTracker.EXPECT().IsPeerRelation(1).Return(false, nil).Times(2)

relationsResolver := s.newRelationResolver(s.mockRelStTracker, s.mockSupDestroyer)
_, err := relationsResolver.NextOp(localState, remoteState, &mockOperations{})
c.Assert(errors.Cause(err), gc.Equals, resolver.ErrNoOperation)
Expand All @@ -1606,6 +1627,8 @@ func (s *mockRelationResolverSuite) TestImplicitRelationNoHooks(c *gc.C) {
s.expectIsKnown(1)
s.expectIsImplicit(1)

s.mockRelStTracker.EXPECT().IsPeerRelation(1).Return(false, nil)

relationsResolver := s.newRelationResolver(s.mockRelStTracker, s.mockSupDestroyer)
_, err := relationsResolver.NextOp(localState, remoteState, &mockOperations{})
c.Assert(errors.Cause(err), gc.Equals, resolver.ErrNoOperation)
Expand Down Expand Up @@ -1643,13 +1666,14 @@ func (s *mockRelationResolverSuite) TestPrincipalDyingDestroysSubordinates(c *gc
s.expectIsKnown(1)
s.expectIsImplicitFalse(1)
s.expectState(relationState)
s.expectIsPeerRelationFalse(1)
s.expectHasContainerScope(1)
s.expectStateFound(1)
s.expectRemoteApplication(1, "")
destroyer := mocks.NewMockSubordinateDestroyer(ctrl)
destroyer.EXPECT().DestroyAllSubordinates().Return(nil)

s.mockRelStTracker.EXPECT().IsPeerRelation(1).Return(false, nil).Times(2)

relationsResolver := s.newRelationResolver(s.mockRelStTracker, destroyer)
op, err := relationsResolver.NextOp(localState, remoteState, &mockOperations{})
c.Assert(err, jc.ErrorIsNil)
Expand Down Expand Up @@ -1690,19 +1714,14 @@ func (s *mockRelationResolverSuite) expectIsImplicitFalse(id int) {

func (s *mockRelationResolverSuite) expectStateUnknown(id int) {
exp := s.mockRelStTracker.EXPECT()
exp.State(id).Return(nil, errors.Errorf("unknown relation: %d", id))
exp.State(id).Return(nil, errors.NotFoundf("relation: %d", id))
}

func (s *mockRelationResolverSuite) expectState(st relation.State) {
exp := s.mockRelStTracker.EXPECT()
exp.State(st.RelationId).Return(&st, nil)
}

func (s *mockRelationResolverSuite) expectIsPeerRelationFalse(id int) {
exp := s.mockRelStTracker.EXPECT()
exp.IsPeerRelation(id).Return(false, nil)
}

func (s *mockRelationResolverSuite) expectLocalUnitAndApplicationLife() {
exp := s.mockRelStTracker.EXPECT()
exp.LocalUnitAndApplicationLife().Return(life.Alive, life.Alive, nil)
Expand Down

0 comments on commit 9b31733

Please sign in to comment.