Skip to content

Commit

Permalink
hash: A waiting list match is a formal revalidation
Browse files Browse the repository at this point in the history
Not only does the waiting list operate on an objcore, it also passes a
reference to requests before they reembark a worker. This way when an
objcore is already present during lookup we can attempt a cache hit
without holding the objhead lock.

Instead of repurposing the req::hash_objhead field into an equivalent
req::hash_objcore, the field is actually removed. In order to signal
that a request comes back from its waiting list, the life time of the
req::waitinglist flag is extended until cnt_lookup() is reentered.

If the rushed objcore matches a request, the lookup can result in a
hit without entering the regular lookup critical section. The objhead
lock is briefly acquired to release req's reference on the objhead, to
rely solely on the objcore's objhead reference like a normal hit.

This shifts the infamous waiting list serialization phenomenon to the
vary header match. The rush policy is applied wholesale on cacheable
objects instead of exponentially. This improves waiting list latency
when the outcome is a cache hit, but forces incompatible variants to
reenter a lookup and potentially disembark into the waiting list again.
Since most spurious rushes at every corner of objhead activity are gone,
this change puts all the spurious activity on the incompatible variants
alone instead of all objects on more occasions.

If a cacheable object was inserted in the cache, but already expired,
this behavior enables cache hits. This can be common with multi-tier
Varnish setups where one Varnish server may serve a graced object to
an other, but true of any origin server that may serve stale yet valid
responses.

The waiting list enables a proper response-wide no-cache behavior from
now on, but the built-in VCL prevents it by default. This is also the
first step towards implementing no-cache and private support at the
header field granularity.
  • Loading branch information
dridi committed Mar 4, 2024
1 parent 6b0a8c8 commit 0bd65aa
Show file tree
Hide file tree
Showing 5 changed files with 234 additions and 23 deletions.
3 changes: 0 additions & 3 deletions bin/varnishd/cache/cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,9 +491,6 @@ struct req {

struct objcore *body_oc;

/* The busy objhead we sleep on */
struct objhead *hash_objhead;

/* Built Vary string == workspace reservation */
uint8_t *vary_b;
uint8_t *vary_e;
Expand Down
80 changes: 68 additions & 12 deletions bin/varnishd/cache/cache_hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,41 @@ hsh_insert_busyobj(const struct worker *wrk, struct objhead *oh)
return (oc);
}

/*---------------------------------------------------------------------
*/

static unsigned
hsh_rush_match(struct req *req)
{
struct objhead *oh;
struct objcore *oc;
const uint8_t *vary;

oc = req->objcore;
CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);

AZ(oc->flags & OC_F_BUSY);
if (oc->flags & (OC_F_WITHDRAWN|OC_F_HFM|OC_F_HFP|OC_F_CANCEL|
OC_F_PRIVATE|OC_F_FAILED|OC_F_DYING))

return (0);

if (req->vcf != NULL) /* NB: must operate under oh lock. */
return (0);

oh = oc->objhead;
CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);

if (req->hash_ignore_vary)
return (1);
if (!ObjHasAttr(req->wrk, oc, OA_VARY))
return (1);

vary = ObjGetAttr(req->wrk, oc, OA_VARY, NULL);
AN(vary);
return (VRY_Match(req, vary));
}

/*---------------------------------------------------------------------
*/

Expand Down Expand Up @@ -383,22 +418,40 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
CHECK_OBJ_ORNULL(req->objcore, OBJCORE_MAGIC);
CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC);
AN(hash);

hsh_prealloc(wrk);
if (DO_DEBUG(DBG_HASHEDGE))
hsh_testmagic(req->digest);

if (req->hash_objhead != NULL) {
if (req->objcore != NULL && hsh_rush_match(req)) {
TAKE_OBJ_NOTNULL(oc, &req->objcore, OBJCORE_MAGIC);
*ocp = oc;
oh = oc->objhead;
Lck_Lock(&oh->mtx);
oc->hits++;
boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
AN(hsh_deref_objhead_unlock(wrk, &oh, NULL));
Req_LogHit(wrk, req, oc, boc_progress);
/* NB: since this hit comes from the waiting list instead of
* a regular lookup, grace is not considered. The object is
* fresh in the context of the waiting list, even expired: it
* was successfully just [re]validated by a fetch task.
*/
return (HSH_HIT);
}

if (req->objcore != NULL) {
/*
* This req came off the waiting list, and brings an
* oh refcnt with it.
* oh refcnt and an incompatible oc refcnt with it,
* the latter acquired during rush hour.
*/
CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
oh = req->hash_objhead;
oh = req->objcore->objhead;
(void)HSH_DerefObjCore(wrk, &req->objcore);
Lck_Lock(&oh->mtx);
req->hash_objhead = NULL;
} else {
AN(wrk->wpriv->nobjhead);
oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead);
Expand Down Expand Up @@ -581,11 +634,12 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
AZ(req->hash_ignore_busy);

/*
* The objhead reference transfers to the sess, we get it
* back when the sess comes off the waiting list and
* calls us again
* The objhead reference is held by req while it is parked on the
* waiting list. The oh pointer is taken back from the objcore that
* triggers a rush of req off the waiting list.
*/
req->hash_objhead = oh;
assert(oh->refcnt > 1);

req->wrk = NULL;
req->waitinglist = 1;

Expand Down Expand Up @@ -640,8 +694,10 @@ hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r)
AZ(req->wrk);
VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
req->waitinglist = 0;
req->objcore = oc;
}

oc->refcnt += i;
}

/*---------------------------------------------------------------------
Expand Down Expand Up @@ -899,8 +955,8 @@ HSH_Withdraw(struct worker *wrk, struct objcore **ocp)
assert(oc->refcnt == 1);
assert(oh->refcnt > 0);
oc->flags = OC_F_WITHDRAWN;
hsh_rush1(wrk, oc, &rush);
AZ(HSH_DerefObjCoreUnlock(wrk, &oc));
hsh_rush1(wrk, oc, &rush); /* grabs up to 1 oc ref */
assert(HSH_DerefObjCoreUnlock(wrk, &oc) <= 1);

hsh_rush2(wrk, &rush);
}
Expand Down
15 changes: 9 additions & 6 deletions bin/varnishd/cache/cache_req_fsm.c
Original file line number Diff line number Diff line change
Expand Up @@ -556,20 +556,23 @@ cnt_lookup(struct worker *wrk, struct req *req)
{
struct objcore *oc, *busy;
enum lookup_e lr;
int had_objhead = 0;
int had_objcore = 0;

CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
AZ(req->objcore);
AZ(req->stale_oc);

AN(req->vcl);

VRY_Prep(req);

AZ(req->objcore);
if (req->hash_objhead)
had_objhead = 1;
if (req->waitinglist) {
CHECK_OBJ_NOTNULL(req->objcore, OBJCORE_MAGIC);
req->waitinglist = 0;
had_objcore = 1;
} else
AZ(req->objcore);

wrk->strangelove = 0;
lr = HSH_Lookup(req, &oc, &busy);
if (lr == HSH_BUSY) {
Expand All @@ -585,7 +588,7 @@ cnt_lookup(struct worker *wrk, struct req *req)
if ((unsigned)wrk->strangelove >= cache_param->vary_notice)
VSLb(req->vsl, SLT_Notice, "vsl: High number of variants (%d)",
wrk->strangelove);
if (had_objhead)
if (had_objcore)
VSLb_ts_req(req, "Waitinglist", W_TIM_real(wrk));

if (req->vcf != NULL) {
Expand Down
3 changes: 1 addition & 2 deletions bin/varnishd/cache/cache_vary.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,7 @@ vry_cmp(const uint8_t *v1, const uint8_t *v2)
void
VRY_Prep(struct req *req)
{
if (req->hash_objhead == NULL) {
/* Not a waiting list return */
if (!req->waitinglist) {
AZ(req->vary_b);
AZ(req->vary_e);
(void)WS_ReserveAll(req->ws);
Expand Down
156 changes: 156 additions & 0 deletions bin/varnishtest/tests/c00125.vtc
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
varnishtest "successful expired waiting list hit"

barrier b1 cond 2
barrier b2 cond 2
barrier b3 cond 2
barrier b4 cond 2


server s1 {
rxreq
expect req.http.user-agent == c1
expect req.http.bgfetch == false
barrier b1 sync
barrier b2 sync
txresp -hdr "Cache-Control: max-age=60" -hdr "Age: 120"

rxreq
expect req.http.user-agent == c3
expect req.http.bgfetch == true
txresp

# The no-cache case only works with a complicit VCL, for now.
rxreq
expect req.http.user-agent == c4
expect req.http.bgfetch == false
barrier b3 sync
barrier b4 sync
txresp -hdr "Cache-Control: no-cache"

rxreq
expect req.http.user-agent == c6
expect req.http.bgfetch == false
txresp -hdr "Cache-Control: no-cache"
} -start

varnish v1 -cliok "param.set default_grace 1h"
varnish v1 -cliok "param.set thread_pools 1"
varnish v1 -cliok "param.set debug +syncvsl,+waitinglist"
varnish v1 -vcl+backend {
sub vcl_backend_fetch {
set bereq.http.bgfetch = bereq.is_bgfetch;
}
sub vcl_beresp_stale {
# We just validated a stale object, do not mark it as
# uncacheable. The object remains available for grace
# hits and background fetches.
return;
}
sub vcl_beresp_control {
if (beresp.http.cache-control == "no-cache") {
# Keep beresp.uncacheable clear.
return;
}
}
sub vcl_deliver {
set resp.http.obj-hits = obj.hits;
set resp.http.obj-ttl = obj.ttl;
}
} -start

client c1 {
txreq -url "/stale-hit"
rxresp
expect resp.status == 200
expect resp.http.x-varnish == 1001
expect resp.http.obj-hits == 0
expect resp.http.obj-ttl < 0
} -start

barrier b1 sync

client c2 {
txreq -url "/stale-hit"
rxresp
expect resp.status == 200
expect resp.http.x-varnish == "1004 1002"
expect resp.http.obj-hits == 1
expect resp.http.obj-ttl < 0
} -start

varnish v1 -expect busy_sleep == 1
barrier b2 sync

client c1 -wait
client c2 -wait

varnish v1 -vsl_catchup

varnish v1 -expect cache_miss == 1
varnish v1 -expect cache_hit == 1
varnish v1 -expect cache_hit_grace == 0
varnish v1 -expect s_bgfetch == 0

client c3 {
txreq -url "/stale-hit"
rxresp
expect resp.status == 200
expect resp.http.x-varnish == "1006 1002"
expect resp.http.obj-hits == 2
expect resp.http.obj-ttl < 0
} -run

varnish v1 -vsl_catchup

varnish v1 -expect cache_miss == 1
varnish v1 -expect cache_hit == 2
varnish v1 -expect cache_hit_grace == 1
varnish v1 -expect s_bgfetch == 1

# The only way for a plain no-cache to be hit is to have a non-zero keep.
varnish v1 -cliok "param.set default_ttl 0"
varnish v1 -cliok "param.set default_grace 0"
varnish v1 -cliok "param.set default_keep 1h"

client c4 {
txreq -url "/no-cache-hit"
rxresp
expect resp.status == 200
expect resp.http.x-varnish == 1009
expect resp.http.obj-hits == 0
expect resp.http.obj-ttl <= 0
} -start

barrier b3 sync

client c5 {
txreq -url "/no-cache-hit"
rxresp
expect resp.status == 200
expect resp.http.x-varnish == "1012 1010"
expect resp.http.obj-hits == 1
expect resp.http.obj-ttl <= 0
} -start

varnish v1 -expect busy_sleep == 2
barrier b4 sync

client c4 -wait
client c5 -wait

varnish v1 -vsl_catchup

varnish v1 -expect cache_miss == 2
varnish v1 -expect cache_hit == 3
varnish v1 -expect cache_hit_grace == 1
varnish v1 -expect s_bgfetch == 1

# No hit when not on the waiting list
client c6 {
txreq -url "/no-cache-hit"
rxresp
expect resp.status == 200
expect resp.http.x-varnish == 1014
expect resp.http.obj-hits == 0
expect resp.http.obj-ttl <= 0
} -run

0 comments on commit 0bd65aa

Please sign in to comment.