Skip to content

Commit

Permalink
Merge pull request juju#17134 from wallyworld/retry-charm-sha256
Browse files Browse the repository at this point in the history
juju#17134

It's been observed when running some smoke tests that the unit agent can error when deploying / refreshing a charm - the agent goes to failure status with an error that the charm's sha256 is empty. It turns out his can happen if the charm is not yet saved to the database when the unit agent makes the api call during deploy / refresh. The root cause here is ultimately the transition to async deploys. But there's mistakes in that implementation if the unit agent is allowed to start or upgrade before the charm is ready.

This PR adds a defensive retry to the unit agent facade when looking up the sha256 - if the charm is reported as not found (since it's still pending etc), it retries. This will solve the intermittent issue until the more extensive work is done to fix async deploys and refresh.

## QA steps

./main.sh -v -s 'test_build' smoke

## Links

Partial fix for https://bugs.launchpad.net/juju/+bug/2059990

**Jira card:** JUJU-5797
  • Loading branch information
jujubot authored Apr 2, 2024
2 parents e28dc33 + f483d87 commit 89fc93c
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 7 deletions.
44 changes: 37 additions & 7 deletions apiserver/facades/agent/uniter/uniter.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ import (
"fmt"
"sort"
"strings"
"time"

"github.com/juju/charm/v13"
"github.com/juju/clock"
"github.com/juju/collections/transform"
"github.com/juju/errors"
"github.com/juju/loggo/v2"
"github.com/juju/names/v5"
"github.com/juju/retry"

"github.com/juju/juju/apiserver/common"
"github.com/juju/juju/apiserver/common/cloudspec"
Expand Down Expand Up @@ -796,18 +798,46 @@ func (u *UniterAPI) CharmArchiveSha256(ctx context.Context, args params.CharmURL
Results: make([]params.StringResult, len(args.URLs)),
}
for i, arg := range args.URLs {
sch, err := u.st.Charm(arg.URL)
if errors.Is(err, errors.NotFound) {
err = apiservererrors.ErrPerm
}
if err == nil {
result.Results[i].Result = sch.BundleSha256()
sha, err := u.oneCharmArchiveSha256(ctx, arg.URL)
if err != nil {
result.Results[i].Error = apiservererrors.ServerError(err)
continue
}
result.Results[i].Error = apiservererrors.ServerError(err)
result.Results[i].Result = sha

}
return result, nil
}

func (u *UniterAPI) oneCharmArchiveSha256(ctx context.Context, curl string) (string, error) {
// The charm in state may only be a placeholder when this call is made.
// Ideally, the unit agent would not be started until the charm is fully available,
// but that's not currently the case and it doesn't hurt to be defensive here regardless.
// We'll retry the sha256 lookup if the charm is still pending and therefore not found.
var sha string
err := retry.Call(retry.CallArgs{
Func: func() error {
sch, err := u.st.Charm(curl)
if err != nil {
return errors.Trace(err)
}
sha = sch.BundleSha256()
return nil
},
IsFatalError: func(err error) bool {
return !errors.Is(err, errors.NotFound)
},
Stop: ctx.Done(),
Delay: 3 * time.Second,
Attempts: 20,
Clock: u.clock,
})
if errors.Is(err, errors.NotFound) {
return "", apiservererrors.ErrPerm
}
return sha, errors.Trace(err)
}

// Relation returns information about all given relation/unit pairs,
// including their id, key and the local endpoint.
func (u *UniterAPI) Relation(ctx context.Context, args params.RelationUnits) (params.RelationResults, error) {
Expand Down
41 changes: 41 additions & 0 deletions apiserver/facades/agent/uniter/uniter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
"github.com/juju/juju/rpc/params"
"github.com/juju/juju/state"
statetesting "github.com/juju/juju/state/testing"
"github.com/juju/juju/testcharms"
coretesting "github.com/juju/juju/testing"
"github.com/juju/juju/testing/factory"
)
Expand Down Expand Up @@ -1435,6 +1436,46 @@ func (s *uniterSuite) TestCharmArchiveSha256(c *gc.C) {
})
}

func (s *uniterSuite) TestCharmArchiveSha256Pending(c *gc.C) {
st := s.ControllerModel(c).State()

curl := charm.MustParseURL("ch:amd64/dummy-666")
err := st.AddCharmPlaceholder(curl)
c.Assert(err, jc.ErrorIsNil)

bundleSHA256 := fmt.Sprintf("%d", time.Now().Unix())
go func() {
// TODO - the base suite uses a state pool with a wallclock
// This is go away as an issue once we move off mongo.
// The first retry happens after 3 seconds so wait longer than that.
time.Sleep(5 * time.Second)
ch := testcharms.Hub.CharmDir("dummy")
info := state.CharmInfo{
Charm: ch,
ID: curl.String(),
StoragePath: "fake-storage-path",
SHA256: bundleSHA256,
}
_, err := st.AddCharm(info)
c.Assert(err, jc.ErrorIsNil)
}()

args := params.CharmURLs{URLs: []params.CharmURL{
{URL: "something-invalid"},
{URL: s.wpCharm.URL()},
{URL: curl.String()},
}}
result, err := s.uniter.CharmArchiveSha256(context.Background(), args)
c.Assert(err, jc.ErrorIsNil)
c.Assert(result, gc.DeepEquals, params.StringResults{
Results: []params.StringResult{
{Error: apiservertesting.ErrUnauthorized},
{Result: s.wpCharm.BundleSha256()},
{Result: bundleSHA256},
},
})
}

func (s *uniterSuite) TestCurrentModel(c *gc.C) {
model := s.ControllerModel(c)
result, err := s.uniter.CurrentModel(context.Background())
Expand Down

0 comments on commit 89fc93c

Please sign in to comment.