hash: Treat a waiting list match as a revalidation

If a fetch succeeded and an object was inserted as cacheable, it means that the VCL did not interpret the backend response as revalidated and shareable with other clients on the waiting list. After changing the rush to operate based on objcore flags instead of the call site deciding how many clients to wake up, the same objcore reference is now passed to requests before they reembark a worker. This way when an objcore is already present during lookup we can attempt a cache hit directly on the objcore that triggered the rush, removing a degree of uncertainty in the waiting list behavior. Instead of repurposing the req::hash_objhead field into an equivalent req::hash_objcore, the field is actually removed. In order to signal that a request comes back from its waiting list, the life time of the req::waitinglist flag is extended until cnt_lookup() is reentered. If the rushed objcore matches a request, the lookup can result in a hit without entering the regular lookup critical section. The objhead lock is briefly acquired to release req's reference on the objhead, to rely solely on the objcore's objhead reference like a normal hit, and generally perform hit-related operations. This change brings back the exponential rush of cacheable objects briefly neutered. This shifts the infamous waiting list serialization phenomenon to the vary header match. Since most spurious rushes at every corner of objhead activity are gone, this change puts all the spurious activity on the incompatible variants alone instead of all objects on more occasions. If a cacheable object was inserted in the cache, but already expired, this behavior enables cache hits. This can be common with multi-tier Varnish setups where one Varnish server may serve a graced object to another, but true of any origin server that may serve stale yet valid responses. The waiting list enables a proper response-wide no-cache behavior from now on, but the built-in VCL prevents it by default. This is also the first step towards implementing no-cache and private support at the header field granularity.
varnishcache · May 6, 2024 · 683bf6e · 683bf6e
1 parent 16af9b8
commit 683bf6e
Show file tree

Hide file tree

Showing 5 changed files with 236 additions and 28 deletions.
diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h
@@ -494,9 +494,6 @@ struct req {
 
 	struct objcore		*body_oc;
 
-	/* The busy objhead we sleep on */
-	struct objhead		*hash_objhead;
-
 	/* Built Vary string == workspace reservation */
 	uint8_t			*vary_b;
 	uint8_t			*vary_e;

diff --git a/bin/varnishd/cache/cache_hash.c b/bin/varnishd/cache/cache_hash.c
@@ -355,6 +355,41 @@ hsh_insert_busyobj(const struct worker *wrk, struct objhead *oh)
 	return (oc);
 }
 
+/*---------------------------------------------------------------------
+*/
+
+static unsigned
+hsh_rush_match(struct req *req)
+{
+	struct objhead *oh;
+	struct objcore *oc;
+	const uint8_t *vary;
+
+	oc = req->objcore;
+	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
+
+	AZ(oc->flags & OC_F_BUSY);
+	AZ(oc->flags & OC_F_PRIVATE);
+	if (oc->flags & (OC_F_WITHDRAWN|OC_F_HFM|OC_F_HFP|OC_F_CANCEL|
+	    OC_F_FAILED))
+		return (0);
+
+	if (req->vcf != NULL) /* NB: must operate under oh lock. */
+		return (0);
+
+	oh = oc->objhead;
+	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
+
+	if (req->hash_ignore_vary)
+		return (1);
+	if (!ObjHasAttr(req->wrk, oc, OA_VARY))
+		return (1);
+
+	vary = ObjGetAttr(req->wrk, oc, OA_VARY, NULL);
+	AN(vary);
+	return (VRY_Match(req, vary));
+}
+
 /*---------------------------------------------------------------------
  */
 
@@ -383,22 +418,40 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CHECK_OBJ_NOTNULL(wrk->wpriv, WORKER_PRIV_MAGIC);
 	CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
+	CHECK_OBJ_ORNULL(req->objcore, OBJCORE_MAGIC);
 	CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC);
 	AN(hash);
 
 	hsh_prealloc(wrk);
 	if (DO_DEBUG(DBG_HASHEDGE))
 		hsh_testmagic(req->digest);
 
-	if (req->hash_objhead != NULL) {
+	if (req->objcore != NULL && hsh_rush_match(req)) {
+		TAKE_OBJ_NOTNULL(oc, &req->objcore, OBJCORE_MAGIC);
+		*ocp = oc;
+		oh = oc->objhead;
+		Lck_Lock(&oh->mtx);
+		oc->hits++;
+		boc_progress = oc->boc == NULL ? -1 : oc->boc->fetched_so_far;
+		AN(hsh_deref_objhead_unlock(wrk, &oh, oc));
+		Req_LogHit(wrk, req, oc, boc_progress);
+		/* NB: since this hit comes from the waiting list instead of
+		 * a regular lookup, grace is not considered. The object is
+		 * fresh in the context of the waiting list, even expired: it
+		 * was successfully just [re]validated by a fetch task.
+		 */
+		return (HSH_HIT);
+	}
+
+	if (req->objcore != NULL) {
 		/*
 		 * This req came off the waiting list, and brings an
-		 * oh refcnt with it.
+		 * oh refcnt and an incompatible oc refcnt with it,
+		 * the latter acquired during rush hour.
 		 */
-		CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
-		oh = req->hash_objhead;
+		oh = req->objcore->objhead;
+		(void)HSH_DerefObjCore(wrk, &req->objcore);
 		Lck_Lock(&oh->mtx);
-		req->hash_objhead = NULL;
 	} else {
 		AN(wrk->wpriv->nobjhead);
 		oh = hash->lookup(wrk, req->digest, &wrk->wpriv->nobjhead);
@@ -581,11 +634,12 @@ HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
 	AZ(req->hash_ignore_busy);
 
 	/*
-	 * The objhead reference transfers to the sess, we get it
-	 * back when the sess comes off the waiting list and
-	 * calls us again
+	 * The objhead reference is held by req while it is parked on the
+	 * waiting list. The oh pointer is taken back from the objcore that
+	 * triggers a rush of req off the waiting list.
 	 */
-	req->hash_objhead = oh;
+	assert(oh->refcnt > 1);
+
 	req->wrk = NULL;
 	req->waitinglist = 1;
 
@@ -623,25 +677,24 @@ hsh_rush1(const struct worker *wrk, struct objcore *oc, struct rush *r)
 
 	AZ(oc->flags & OC_F_BUSY);
 	AZ(oc->flags & OC_F_PRIVATE);
+	max = cache_param->rush_exponent;
 	if (oc->flags & (OC_F_WITHDRAWN|OC_F_FAILED))
 		max = 1;
-	else if (oc->flags & (OC_F_HFM|OC_F_HFP|OC_F_CANCEL|OC_F_DYING))
-		max = cache_param->rush_exponent;
-	else
-		max = INT_MAX; /* XXX: temp */
 	assert(max > 0);
 
 	for (i = 0; i < max; i++) {
 		req = VTAILQ_FIRST(&oh->waitinglist);
 		if (req == NULL)
 			break;
 		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-		wrk->stats->busy_wakeup++;
 		AZ(req->wrk);
 		VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
 		VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
-		req->waitinglist = 0;
+		req->objcore = oc;
 	}
+
+	oc->refcnt += i;
+	wrk->stats->busy_wakeup += i;
 }
 
 /*---------------------------------------------------------------------
@@ -902,8 +955,8 @@ HSH_Withdraw(struct worker *wrk, struct objcore **ocp)
 	assert(oc->refcnt == 1);
 	assert(oh->refcnt > 0);
 	oc->flags = OC_F_WITHDRAWN;
-	hsh_rush1(wrk, oc, &rush);
-	AZ(HSH_DerefObjCoreUnlock(wrk, &oc));
+	hsh_rush1(wrk, oc, &rush); /* grabs up to 1 oc ref */
+	assert(HSH_DerefObjCoreUnlock(wrk, &oc) <= 1);
 
 	hsh_rush2(wrk, &rush);
 }

diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c
@@ -555,20 +555,23 @@ cnt_lookup(struct worker *wrk, struct req *req)
 {
 	struct objcore *oc, *busy;
 	enum lookup_e lr;
-	int had_objhead = 0;
+	int had_objcore = 0;
 
 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
 	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
-	AZ(req->objcore);
 	AZ(req->stale_oc);
 
 	AN(req->vcl);
 
 	VRY_Prep(req);
 
-	AZ(req->objcore);
-	if (req->hash_objhead)
-		had_objhead = 1;
+	if (req->waitinglist) {
+		CHECK_OBJ_NOTNULL(req->objcore, OBJCORE_MAGIC);
+		req->waitinglist = 0;
+		had_objcore = 1;
+	} else
+		AZ(req->objcore);
+
 	wrk->strangelove = 0;
 	lr = HSH_Lookup(req, &oc, &busy);
 	if (lr == HSH_BUSY) {
@@ -584,7 +587,7 @@ cnt_lookup(struct worker *wrk, struct req *req)
 	if ((unsigned)wrk->strangelove >= cache_param->vary_notice)
 		VSLb(req->vsl, SLT_Notice, "vsl: High number of variants (%d)",
 		    wrk->strangelove);
-	if (had_objhead)
+	if (had_objcore)
 		VSLb_ts_req(req, "Waitinglist", W_TIM_real(wrk));
 
 	if (req->vcf != NULL) {

diff --git a/bin/varnishd/cache/cache_vary.c b/bin/varnishd/cache/cache_vary.c
@@ -224,8 +224,7 @@ vry_cmp(const uint8_t *v1, const uint8_t *v2)
 void
 VRY_Prep(struct req *req)
 {
-	if (req->hash_objhead == NULL) {
-		/* Not a waiting list return */
+	if (!req->waitinglist) {
 		AZ(req->vary_b);
 		AZ(req->vary_e);
 		(void)WS_ReserveAll(req->ws);

diff --git a/bin/varnishtest/tests/c00125.vtc b/bin/varnishtest/tests/c00125.vtc
@@ -0,0 +1,156 @@
+varnishtest "successful expired waiting list hit"
+
+barrier b1 cond 2
+barrier b2 cond 2
+barrier b3 cond 2
+barrier b4 cond 2
+
+
+server s1 {
+	rxreq
+	expect req.http.user-agent == c1
+	expect req.http.bgfetch == false
+	barrier b1 sync
+	barrier b2 sync
+	txresp -hdr "Cache-Control: max-age=60" -hdr "Age: 120"
+
+	rxreq
+	expect req.http.user-agent == c3
+	expect req.http.bgfetch == true
+	txresp
+
+	# The no-cache case only works with a complicit VCL, for now.
+	rxreq
+	expect req.http.user-agent == c4
+	expect req.http.bgfetch == false
+	barrier b3 sync
+	barrier b4 sync
+	txresp -hdr "Cache-Control: no-cache"
+
+	rxreq
+	expect req.http.user-agent == c6
+	expect req.http.bgfetch == false
+	txresp -hdr "Cache-Control: no-cache"
+} -start
+
+varnish v1 -cliok "param.set default_grace 1h"
+varnish v1 -cliok "param.set thread_pools 1"
+varnish v1 -cliok "param.set debug +syncvsl,+waitinglist"
+varnish v1 -vcl+backend {
+	sub vcl_backend_fetch {
+		set bereq.http.bgfetch = bereq.is_bgfetch;
+	}
+	sub vcl_beresp_stale {
+		# We just validated a stale object, do not mark it as
+		# uncacheable. The object remains available for grace
+		# hits and background fetches.
+		return;
+	}
+	sub vcl_beresp_control {
+		if (beresp.http.cache-control == "no-cache") {
+			# Keep beresp.uncacheable clear.
+			return;
+		}
+	}
+	sub vcl_deliver {
+		set resp.http.obj-hits = obj.hits;
+		set resp.http.obj-ttl = obj.ttl;
+	}
+} -start
+
+client c1 {
+	txreq -url "/stale-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == 1001
+	expect resp.http.obj-hits == 0
+	expect resp.http.obj-ttl < 0
+} -start
+
+barrier b1 sync
+
+client c2 {
+	txreq -url "/stale-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == "1004 1002"
+	expect resp.http.obj-hits == 1
+	expect resp.http.obj-ttl < 0
+} -start
+
+varnish v1 -expect busy_sleep == 1
+barrier b2 sync
+
+client c1 -wait
+client c2 -wait
+
+varnish v1 -vsl_catchup
+
+varnish v1 -expect cache_miss == 1
+varnish v1 -expect cache_hit == 1
+varnish v1 -expect cache_hit_grace == 0
+varnish v1 -expect s_bgfetch == 0
+
+client c3 {
+	txreq -url "/stale-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == "1006 1002"
+	expect resp.http.obj-hits == 2
+	expect resp.http.obj-ttl < 0
+} -run
+
+varnish v1 -vsl_catchup
+
+varnish v1 -expect cache_miss == 1
+varnish v1 -expect cache_hit == 2
+varnish v1 -expect cache_hit_grace == 1
+varnish v1 -expect s_bgfetch == 1
+
+# The only way for a plain no-cache to be hit is to have a non-zero keep.
+varnish v1 -cliok "param.set default_ttl 0"
+varnish v1 -cliok "param.set default_grace 0"
+varnish v1 -cliok "param.set default_keep 1h"
+
+client c4 {
+	txreq -url "/no-cache-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == 1009
+	expect resp.http.obj-hits == 0
+	expect resp.http.obj-ttl <= 0
+} -start
+
+barrier b3 sync
+
+client c5 {
+	txreq -url "/no-cache-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == "1012 1010"
+	expect resp.http.obj-hits == 1
+	expect resp.http.obj-ttl <= 0
+} -start
+
+varnish v1 -expect busy_sleep == 2
+barrier b4 sync
+
+client c4 -wait
+client c5 -wait
+
+varnish v1 -vsl_catchup
+
+varnish v1 -expect cache_miss == 2
+varnish v1 -expect cache_hit == 3
+varnish v1 -expect cache_hit_grace == 1
+varnish v1 -expect s_bgfetch == 1
+
+# No hit when not on the waiting list
+client c6 {
+	txreq -url "/no-cache-hit"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.x-varnish == 1014
+	expect resp.http.obj-hits == 0
+	expect resp.http.obj-ttl <= 0
+} -run