From 86bedb94904b5547fe90c4103777f74eeb097194 Mon Sep 17 00:00:00 2001 From: Dridi Boukelmoune Date: Tue, 19 Dec 2023 18:46:40 +0100 Subject: [PATCH] hash: A waiting list match is a formal revalidation Not only does the waiting list operate on an objcore, it also passes a reference to requests before they reembark a worker. This way when an objcore is already present during lookup we can attempt a cache hit without holding the objhead lock. Instead of repurposing the req::hash_objhead field into an equivalent req::hash_objcore, the field is actually removed. In order to signal that a request comes back from its waiting list, the life time of the req::waitinglist flag is extended until cnt_lookup() is reentered. This change creates a new first step in the lookup process: - rush match (when coming from the waiting list) - hash lookup (unless coming from the waiting list) - objhead lookup (unless there was a rush hit) If the rush match is a hit, the objhead lock is briefly acquired to release req's reference, to rely solely on the objcore's reference like a normal hit. This shifts the infamous waiting list serialization phenomenon to the vary header match. Knowing that a rushed object is guaranteed to lead to a cache hit allows the rush policy to be applied wholesale, instead of exponentially. Only requests incompatible with the objcore vary header may reenter the waiting list, a scenario no different from the spurious rush wakeups when operating solely on objhead updates. If a cacheable object was inserted in the cache, but already expired, this behavior enables cache hits. This can be common with multi-tier Varnish setups where one Varnish server may serve a graced object to an other, but true of any origin server that may serve stale yet valid responses. The waiting list enables a proper response-wide no-cache behavior from now on, but the built-in VCL prevents it by default. This is also the first step towards implementing no-cache and private support at the header field granularity. --- bin/varnishd/cache/cache.h | 3 - bin/varnishd/cache/cache_hash.c | 75 +++++++++++--- bin/varnishd/cache/cache_req_fsm.c | 15 +-- bin/varnishd/cache/cache_vary.c | 3 +- bin/varnishtest/tests/c00125.vtc | 156 +++++++++++++++++++++++++++++ 5 files changed, 229 insertions(+), 23 deletions(-) create mode 100644 bin/varnishtest/tests/c00125.vtc diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h index a7fcac6bf44..0faf91ef2fe 100644 --- a/bin/varnishd/cache/cache.h +++ b/bin/varnishd/cache/cache.h @@ -490,9 +490,6 @@ struct req { struct objcore *body_oc; - /* The busy objhead we sleep on */ - struct objhead *hash_objhead; - /* Built Vary string == workspace reservation */ uint8_t *vary_b; uint8_t *vary_e; diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c index 62d429e7d86..4c5421ec733 100644 --- a/bin/varnishd/cache/cache_hash.c +++ b/bin/varnishd/cache/cache_hash.c @@ -512,6 +512,39 @@ hsh_objhead_lookup(struct objhead *oh, struct req *req, struct objcore **ocp, return (HSH_BUSY); } +/*--------------------------------------------------------------------- + */ + +static unsigned +hsh_rush_match(struct req *req) +{ + struct objhead *oh; + struct objcore *oc; + const uint8_t *vary; + + oc = req->objcore; + CHECK_OBJ_ORNULL(oc, OBJCORE_MAGIC); + + if (oc == NULL) + return (0); + + AZ(oc->flags & OC_F_BUSY); + if (oc->flags) + return (0); + + oh = oc->objhead; + CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC); + + if (req->hash_ignore_vary) + return (1); + if (!ObjHasAttr(req->wrk, oc, OA_VARY)) + return (1); + + vary = ObjGetAttr(req->wrk, oc, OA_VARY, NULL); + AN(vary); + return (VRY_Match(req, vary)); +} + /*--------------------------------------------------------------------- */ @@ -537,6 +570,7 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp) CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC); CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC); + CHECK_OBJ_ORNULL(req->objcore, OBJCORE_MAGIC); CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC); AN(hash); @@ -544,15 +578,31 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp) if (DO_DEBUG(DBG_HASHEDGE)) hsh_testmagic(req->digest); - if (req->hash_objhead != NULL) { + if (hsh_rush_match(req)) { + TAKE_OBJ_NOTNULL(oc, &req->objcore, OBJCORE_MAGIC); + *ocp = oc; + oh = oc->objhead; + AN(hsh_deref_objhead(wrk, &oh)); + oc->hits++; + boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far; + Req_LogHit(wrk, req, oc, boc_progress); + /* NB: since this hit comes from the waiting list instead of + * a regular lookup, grace is not considered. The object is + * fresh in the context of the waiting list, even expired: it + * was successfully just [re]validated by a fetch task. + */ + return (HSH_HIT); + } + + if (req->objcore != NULL) { /* * This req came off the waiting list, and brings an - * oh refcnt with it. + * oh refcnt and an incompatible oc refcnt with it, + * the latter acquired during rush hour. */ - CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC); - oh = req->hash_objhead; + oh = req->objcore->objhead; + (void)HSH_DerefObjCore(wrk, &req->objcore); Lck_Lock(&oh->mtx); - req->hash_objhead = NULL; } else { AN(wrk->wpriv->nobjhead); oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead); @@ -625,11 +675,13 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp) VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list); /* - * The objhead reference transfers to the sess, we get it - * back when the sess comes off the waiting list and - * calls us again + * The objhead reference is held by req while it is parked + * on the waiting list. The oh pointer is taken back from + * the objcore that triggers a rush of req off the waiting + * list. */ - req->hash_objhead = oh; + assert(oh->refcnt > 1); + req->wrk = NULL; req->waitinglist = 1; @@ -690,11 +742,10 @@ hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r) AZ(req->wrk); VTAILQ_REMOVE(&oh->waitinglist, req, w_list); VTAILQ_INSERT_TAIL(&r->reqs, req, w_list); - req->waitinglist = 0; + req->objcore = oc; } - if (i > 0) - assert(oh->refcnt > 1); + oc->refcnt += i; } /*--------------------------------------------------------------------- diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c index e3d24c96ad0..650a26e53a5 100644 --- a/bin/varnishd/cache/cache_req_fsm.c +++ b/bin/varnishd/cache/cache_req_fsm.c @@ -557,20 +557,23 @@ cnt_lookup(struct worker *wrk, struct req *req) { struct objcore *oc, *busy; enum lookup_e lr; - int had_objhead = 0; + int had_objcore = 0; CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC); CHECK_OBJ_NOTNULL(req, REQ_MAGIC); - AZ(req->objcore); AZ(req->stale_oc); AN(req->vcl); VRY_Prep(req); - AZ(req->objcore); - if (req->hash_objhead) - had_objhead = 1; + if (req->waitinglist) { + CHECK_OBJ_NOTNULL(req->objcore, OBJCORE_MAGIC); + req->waitinglist = 0; + had_objcore = 1; + } else + AZ(req->objcore); + wrk->strangelove = 0; lr = HSH_Lookup(req, &oc, &busy); if (lr == HSH_BUSY) { @@ -586,7 +589,7 @@ cnt_lookup(struct worker *wrk, struct req *req) if ((unsigned)wrk->strangelove >= cache_param->vary_notice) VSLb(req->vsl, SLT_Notice, "vsl: High number of variants (%d)", wrk->strangelove); - if (had_objhead) + if (had_objcore) VSLb_ts_req(req, "Waitinglist", W_TIM_real(wrk)); if (req->vcf != NULL) { diff --git a/bin/varnishd/cache/cache_vary.c b/bin/varnishd/cache/cache_vary.c index 101eca231b0..9865769d768 100644 --- a/bin/varnishd/cache/cache_vary.c +++ b/bin/varnishd/cache/cache_vary.c @@ -224,8 +224,7 @@ vry_cmp(const uint8_t *v1, const uint8_t *v2) void VRY_Prep(struct req *req) { - if (req->hash_objhead == NULL) { - /* Not a waiting list return */ + if (!req->waitinglist) { AZ(req->vary_b); AZ(req->vary_e); (void)WS_ReserveAll(req->ws); diff --git a/bin/varnishtest/tests/c00125.vtc b/bin/varnishtest/tests/c00125.vtc new file mode 100644 index 00000000000..8fbca4f46b2 --- /dev/null +++ b/bin/varnishtest/tests/c00125.vtc @@ -0,0 +1,156 @@ +varnishtest "successful expired waiting list hit" + +barrier b1 cond 2 +barrier b2 cond 2 +barrier b3 cond 2 +barrier b4 cond 2 + + +server s1 { + rxreq + expect req.http.user-agent == c1 + expect req.http.bgfetch == false + barrier b1 sync + barrier b2 sync + txresp -hdr "Cache-Control: max-age=60" -hdr "Age: 120" + + rxreq + expect req.http.user-agent == c3 + expect req.http.bgfetch == true + txresp + + # The no-cache case only works with a complicit VCL, for now. + rxreq + expect req.http.user-agent == c4 + expect req.http.bgfetch == false + barrier b3 sync + barrier b4 sync + txresp -hdr "Cache-Control: no-cache" + + rxreq + expect req.http.user-agent == c6 + expect req.http.bgfetch == false + txresp -hdr "Cache-Control: no-cache" +} -start + +varnish v1 -cliok "param.set default_grace 1h" +varnish v1 -cliok "param.set thread_pools 1" +varnish v1 -cliok "param.set debug +syncvsl,+waitinglist" +varnish v1 -vcl+backend { + sub vcl_backend_fetch { + set bereq.http.bgfetch = bereq.is_bgfetch; + } + sub vcl_beresp_stale { + # We just validated a stale object, do not mark it as + # uncacheable. The object remains available for grace + # hits and background fetches. + return; + } + sub vcl_beresp_control { + if (beresp.http.cache-control == "no-cache") { + # Keep beresp.uncacheable clear. + return; + } + } + sub vcl_deliver { + set resp.http.obj-hits = obj.hits; + set resp.http.obj-ttl = obj.ttl; + } +} -start + +client c1 { + txreq -url "/stale-hit" + rxresp + expect resp.status == 200 + expect resp.http.x-varnish == 1001 + expect resp.http.obj-hits == 0 + expect resp.http.obj-ttl < 0 +} -start + +barrier b1 sync + +client c2 { + txreq -url "/stale-hit" + rxresp + expect resp.status == 200 + expect resp.http.x-varnish == "1004 1002" + expect resp.http.obj-hits == 1 + expect resp.http.obj-ttl < 0 +} -start + +varnish v1 -expect busy_sleep == 1 +barrier b2 sync + +client c1 -wait +client c2 -wait + +varnish v1 -vsl_catchup + +varnish v1 -expect cache_miss == 1 +varnish v1 -expect cache_hit == 1 +varnish v1 -expect cache_hit_grace == 0 +varnish v1 -expect s_bgfetch == 0 + +client c3 { + txreq -url "/stale-hit" + rxresp + expect resp.status == 200 + expect resp.http.x-varnish == "1006 1002" + expect resp.http.obj-hits == 2 + expect resp.http.obj-ttl < 0 +} -run + +varnish v1 -vsl_catchup + +varnish v1 -expect cache_miss == 1 +varnish v1 -expect cache_hit == 2 +varnish v1 -expect cache_hit_grace == 1 +varnish v1 -expect s_bgfetch == 1 + +# The only way for a plain no-cache to be hit is to have a non-zero keep. +varnish v1 -cliok "param.set default_ttl 0" +varnish v1 -cliok "param.set default_grace 0" +varnish v1 -cliok "param.set default_keep 1h" + +client c4 { + txreq -url "/no-cache-hit" + rxresp + expect resp.status == 200 + expect resp.http.x-varnish == 1009 + expect resp.http.obj-hits == 0 + expect resp.http.obj-ttl <= 0 +} -start + +barrier b3 sync + +client c5 { + txreq -url "/no-cache-hit" + rxresp + expect resp.status == 200 + expect resp.http.x-varnish == "1012 1010" + expect resp.http.obj-hits == 1 + expect resp.http.obj-ttl <= 0 +} -start + +varnish v1 -expect busy_sleep == 2 +barrier b4 sync + +client c4 -wait +client c5 -wait + +varnish v1 -vsl_catchup + +varnish v1 -expect cache_miss == 2 +varnish v1 -expect cache_hit == 3 +varnish v1 -expect cache_hit_grace == 1 +varnish v1 -expect s_bgfetch == 1 + +# No hit when not on the waiting list +client c6 { + txreq -url "/no-cache-hit" + rxresp + expect resp.status == 200 + expect resp.http.x-varnish == 1014 + expect resp.http.obj-hits == 0 + expect resp.http.obj-ttl <= 0 +} -run