From 6f01129b7e3051ce4e8f6df1018ae3f697c4e4b8 Mon Sep 17 00:00:00 2001 From: Mihail Stoykov Date: Sat, 25 Apr 2020 18:14:02 +0300 Subject: [PATCH] implementation of scale using jumps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implementation saves the offset from the start of the cycle instead of from the previous step, called jumps later. This favors big numerator segments as can be seen by the benchmarks. I decided to drop the offsets and just use the jumps to get the offsets as the most CPU intensive part of this is calculating the jumps Obviously this means that if most uses cases require the offsets this is not better performing I would expect that in the cases that this gets worsen it gets worsen insignificantly compared to the cases where the new "jumps" give much better performance. See the absolute values as well as the difference. The sort.Search is inlined because this gives something like 30-70% boost which I would argue here it is worth the few extra lines name old time/op new time/op delta pkg:go.k6.io/k6/lib goos:linux goarch:amd64 GetStripedOffsets/length10,seed777-8 36.0µs ±29% 34.1µs ±36% ~ (p=0.796 n=10+10) GetStripedOffsets/length100,seed777-8 1.57ms ± 9% 1.36ms ±16% -13.47% (p=0.001 n=9+10) GetStripedOffsetsEven/length10-8 5.74µs ± 5% 5.01µs ± 6% -12.78% (p=0.000 n=10+10) GetStripedOffsetsEven/length100-8 68.9µs ±10% 57.7µs ± 6% -16.28% (p=0.000 n=10+10) GetStripedOffsetsEven/length1000-8 3.16ms ±12% 3.03ms ± 7% ~ (p=0.089 n=10+10) ExecutionSegmentScale/seq:;segment:/segment.Scale(5)-8 2.52ns ± 5% 2.56ns ± 4% ~ (p=0.184 n=10+10) ExecutionSegmentScale/seq:;segment:/et.Scale(5)-8 3.50µs ± 5% 3.14µs ±14% -10.47% (p=0.001 n=9+10) ExecutionSegmentScale/seq:;segment:/et.Scale(5)_prefilled-8 0.65ns ± 8% 2.53ns ± 1% +289.75% (p=0.000 n=10+9) ExecutionSegmentScale/seq:;segment:/segment.Scale(5523)-8 2.48ns ± 5% 2.58ns ± 5% +4.30% (p=0.009 n=10+10) ExecutionSegmentScale/seq:;segment:/et.Scale(5523)-8 3.71µs ± 9% 3.14µs ± 8% -15.54% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:/et.Scale(5523)_prefilled-8 0.61ns ± 7% 2.49ns ± 2% +304.56% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:/segment.Scale(5000000)-8 2.33ns ± 6% 2.56ns ± 4% +9.78% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:/et.Scale(5000000)-8 3.84µs ± 5% 3.13µs ± 3% -18.57% (p=0.000 n=9+9) ExecutionSegmentScale/seq:;segment:/et.Scale(5000000)_prefilled-8 0.63ns ± 7% 2.52ns ± 3% +297.39% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:/segment.Scale(67280421310721)-8 2.36ns ± 5% 2.55ns ± 4% +8.09% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:/et.Scale(67280421310721)-8 3.78µs ± 6% 3.18µs ± 6% -15.98% (p=0.000 n=9+10) ExecutionSegmentScale/seq:;segment:/et.Scale(67280421310721)_prefilled-8 0.62ns ± 6% 2.51ns ± 4% +302.29% (p=0.000 n=10+9) ExecutionSegmentScale/seq:;segment:0:1/segment.Scale(5)-8 2.22µs ± 7% 1.94µs ± 8% -12.42% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5)-8 4.03µs ± 7% 3.35µs ± 5% -17.03% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5)_prefilled-8 0.65ns ± 7% 2.49ns ± 3% +283.93% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:0:1/segment.Scale(5523)-8 2.24µs ± 7% 1.94µs ± 8% -13.50% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5523)-8 3.94µs ± 5% 3.45µs ± 7% -12.47% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5523)_prefilled-8 0.63ns ± 8% 2.49ns ± 5% +297.91% (p=0.000 n=10+9) ExecutionSegmentScale/seq:;segment:0:1/segment.Scale(5000000)-8 2.31µs ± 8% 1.95µs ± 9% -15.43% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5000000)-8 3.89µs ± 8% 3.32µs ± 6% -14.62% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5000000)_prefilled-8 0.62ns ± 5% 2.53ns ± 4% +309.71% (p=0.000 n=9+10) ExecutionSegmentScale/seq:;segment:0:1/segment.Scale(67280421310721)-8 2.26µs ± 2% 1.87µs ± 5% -17.13% (p=0.000 n=10+8) ExecutionSegmentScale/seq:;segment:0:1/et.Scale(67280421310721)-8 3.88µs ± 7% 3.48µs ± 7% -10.17% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:0:1/et.Scale(67280421310721)_prefilled-8 0.61ns ± 6% 2.51ns ± 3% +309.80% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/segment.Scale(5)-8 2.93µs ± 6% 2.55µs ± 4% -13.11% (p=0.000 n=10+9) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5)-8 4.65µs ± 5% 4.03µs ± 6% -13.50% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5)_prefilled-8 12.1ns ± 3% 6.3ns ± 3% -47.73% (p=0.000 n=10+9) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/segment.Scale(5523)-8 2.80µs ± 5% 2.35µs ± 5% -16.12% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5523)-8 4.66µs ± 5% 4.04µs ± 8% -13.21% (p=0.000 n=9+10) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5523)_prefilled-8 10.3ns ± 3% 6.3ns ± 3% -39.20% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/segment.Scale(5000000)-8 2.43µs ± 8% 2.05µs ± 8% -15.65% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5000000)-8 4.80µs ± 6% 4.04µs ± 9% -15.74% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5000000)_prefilled-8 6.61ns ± 5% 7.67ns ± 7% +15.93% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/segment.Scale(67280421310721)-8 2.63µs ± 6% 2.25µs ± 8% -14.41% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(67280421310721)-8 4.70µs ± 7% 3.98µs ±14% -15.29% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(67280421310721)_prefilled-8 20.2ns ± 3% 10.6ns ± 5% -47.60% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/segment.Scale(5)-8 2.79µs ± 6% 2.35µs ± 4% -15.79% (p=0.000 n=10+9) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5)-8 5.49µs ± 7% 4.65µs ± 8% -15.32% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5)_prefilled-8 8.07ns ± 3% 4.96ns ± 5% -38.60% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/segment.Scale(5523)-8 2.58µs ± 8% 2.23µs ± 4% -13.49% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5523)-8 5.42µs ± 4% 4.51µs ± 4% -16.76% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5523)_prefilled-8 10.3ns ± 3% 5.7ns ± 4% -45.04% (p=0.000 n=10+9) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/segment.Scale(5000000)-8 2.41µs ± 9% 2.04µs ± 9% -15.37% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5000000)-8 5.48µs ± 8% 4.55µs ±11% -17.08% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5000000)_prefilled-8 6.70ns ± 1% 7.03ns ± 2% +4.88% (p=0.000 n=9+9) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/segment.Scale(67280421310721)-8 2.60µs ± 7% 2.20µs ±10% -15.28% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(67280421310721)-8 5.49µs ± 5% 4.51µs ± 6% -17.94% (p=0.000 n=10+10) ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(67280421310721)_prefilled-8 20.2ns ± 4% 10.5ns ± 4% -47.85% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2/5:4/5/segment.Scale(5)-8 3.41µs ±11% 2.94µs ± 7% -13.93% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5)-8 6.62µs ± 5% 5.93µs ± 7% -10.44% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5)_prefilled-8 4.03ns ± 3% 4.40ns ± 2% +9.16% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2/5:4/5/segment.Scale(5523)-8 3.60µs ± 8% 3.09µs ± 8% -14.02% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5523)-8 6.79µs ± 6% 6.13µs ± 6% -9.77% (p=0.000 n=10+9) ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5523)_prefilled-8 11.0ns ± 3% 6.4ns ± 7% -41.82% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2/5:4/5/segment.Scale(5000000)-8 3.37µs ± 6% 2.89µs ±10% -14.11% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5000000)-8 6.94µs ± 2% 5.93µs ± 3% -14.59% (p=0.000 n=9+8) ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5000000)_prefilled-8 6.98ns ± 3% 7.38ns ± 3% +5.86% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2/5:4/5/segment.Scale(67280421310721)-8 3.86µs ± 3% 3.29µs ± 5% -14.81% (p=0.000 n=8+9) ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(67280421310721)-8 6.75µs ± 6% 6.07µs ± 7% -10.05% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(67280421310721)_prefilled-8 10.0ns ± 3% 10.5ns ± 2% +5.63% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/segment.Scale(5)-8 3.51µs ± 8% 3.03µs ± 8% -13.52% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5)-8 809µs ± 4% 635µs ± 7% -21.45% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5)_prefilled-8 12.0ns ± 2% 18.8ns ± 5% +55.93% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/segment.Scale(5523)-8 3.71µs ± 7% 3.20µs ± 4% -13.69% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5523)-8 828µs ± 5% 635µs ± 6% -23.35% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5523)_prefilled-8 8.11µs ± 3% 0.02µs ± 5% -99.78% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/segment.Scale(5000000)-8 3.63µs ± 3% 3.04µs ± 7% -16.06% (p=0.000 n=8+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5000000)-8 929µs ± 6% 642µs ± 5% -30.91% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5000000)_prefilled-8 41.2µs ± 2% 0.0µs ± 4% -99.96% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/segment.Scale(67280421310721)-8 3.94µs ± 4% 3.40µs ± 5% -13.77% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(67280421310721)-8 908µs ± 9% 630µs ± 8% -30.63% (p=0.000 n=10+10) ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(67280421310721)_prefilled-8 41.4µs ± 2% 0.0µs ± 6% -99.95% (p=0.000 n=9+10) pkg:go.k6.io/k6/lib/executor goos:linux goarch:amd64 Cal/1s-8 4.25µs ±14% 5.24µs ±13% +23.21% (p=0.000 n=10+10) Cal/1m0s-8 273µs ± 3% 309µs ± 8% +13.09% (p=0.000 n=9+9) CalRat/1s-8 12.1ms ± 2% 14.4ms ± 2% +18.80% (p=0.000 n=8+8) CalRat/1m0s-8 8.12s ± 2% 8.06s ± 1% ~ (p=0.408 n=10+8) RampingVUsGetRawExecutionSteps/seq:;segment:/normal-8 390µs ± 5% 312µs ± 6% -19.97% (p=0.000 n=10+10) RampingVUsGetRawExecutionSteps/seq:;segment:/rollercoaster-8 3.99ms ± 7% 3.21ms ± 9% -19.58% (p=0.000 n=10+10) RampingVUsGetRawExecutionSteps/seq:;segment:0:1/normal-8 386µs ± 5% 304µs ± 5% -21.39% (p=0.000 n=9+10) RampingVUsGetRawExecutionSteps/seq:;segment:0:1/rollercoaster-8 3.95ms ± 5% 3.21ms ± 6% -18.81% (p=0.000 n=10+10) RampingVUsGetRawExecutionSteps/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/normal-8 114µs ± 5% 91µs ± 9% -19.58% (p=0.000 n=10+10) RampingVUsGetRawExecutionSteps/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/rollercoaster-8 1.25ms ± 6% 1.00ms ± 9% -19.82% (p=0.000 n=10+10) RampingVUsGetRawExecutionSteps/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/normal-8 38.5µs ± 5% 32.8µs ± 5% -14.80% (p=0.000 n=10+9) RampingVUsGetRawExecutionSteps/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/rollercoaster-8 425µs ± 8% 324µs ± 5% -23.90% (p=0.000 n=10+10) RampingVUsGetRawExecutionSteps/seq:;segment:2/5:4/5/normal-8 152µs ±10% 121µs ±10% -20.07% (p=0.000 n=10+10) RampingVUsGetRawExecutionSteps/seq:;segment:2/5:4/5/rollercoaster-8 1.60ms ± 6% 1.28ms ± 5% -20.02% (p=0.000 n=10+9) RampingVUsGetRawExecutionSteps/seq:;segment:2235/5213:4/5/normal-8 148µs ± 5% 138µs ± 7% -6.56% (p=0.000 n=10+10) RampingVUsGetRawExecutionSteps/seq:;segment:2235/5213:4/5/rollercoaster-8 1.42ms ±11% 1.20ms ± 6% -15.34% (p=0.000 n=10+10) VUHandleIterations-8 1.00s ± 0% 1.00s ± 0% ~ (p=0.529 n=10+10) name old iterations/s new iterations/s delta pkg:go.k6.io/k6/lib/executor goos:linux goarch:amd64 RampingArrivalRateRun/VUs10-8 251k ± 5% 262k ± 6% +4.33% (p=0.043 n=10+10) RampingArrivalRateRun/VUs100-8 315k ± 2% 321k ± 2% +2.07% (p=0.002 n=10+10) RampingArrivalRateRun/VUs1000-8 291k ± 2% 306k ± 1% +5.02% (p=0.000 n=10+10) RampingArrivalRateRun/VUs10000-8 266k ± 2% 286k ± 2% +7.84% (p=0.000 n=10+10) VUHandleIterations-8 0.09 ± 6% 0.08 ± 6% -14.06% (p=0.000 n=10+10) --- lib/execution_segment.go | 111 ++++++++++++++++++++----------- lib/execution_segment_test.go | 2 +- lib/executor/ramping_vus_test.go | 20 ++++++ 3 files changed, 92 insertions(+), 41 deletions(-) diff --git a/lib/execution_segment.go b/lib/execution_segment.go index 9dff1c0358b..0036540dccd 100644 --- a/lib/execution_segment.go +++ b/lib/execution_segment.go @@ -494,9 +494,9 @@ type ExecutionSegmentSequenceWrapper struct { ExecutionSegmentSequence // a filled-out segment sequence lcd int64 // pre-calculated least common denominator - // The striped offsets, i.e. the repeating indexes that "belong" to each + // The striped jumps, i.e. the repeating indexes that "belong" to each // execution segment in the sequence. - offsets [][]int64 + jumps [][]int64 } // NewExecutionSegmentSequenceWrapper expects a filled-out execution segment @@ -508,7 +508,7 @@ func NewExecutionSegmentSequenceWrapper(ess ExecutionSegmentSequence) *Execution } sequenceLength := len(ess) - offsets := make([][]int64, sequenceLength) + jumps := make([][]int64, sequenceLength) lcd := ess.LCD() // This will contain the normalized numerator values (i.e. what they would have @@ -524,7 +524,7 @@ func NewExecutionSegmentSequenceWrapper(ess ExecutionSegmentSequence) *Execution normalizedNumerator := ess[i].length.Num().Int64() * (lcd / ess[i].length.Denom().Int64()) sortedNormalizedIndexes[i].normNumerator = normalizedNumerator sortedNormalizedIndexes[i].originalIndex = i - offsets[i] = make([]int64, 0, normalizedNumerator+1) + jumps[i] = make([]int64, 0, normalizedNumerator) } sort.SliceStable(sortedNormalizedIndexes, func(i, j int) bool { @@ -561,28 +561,21 @@ func NewExecutionSegmentSequenceWrapper(ess ExecutionSegmentSequence) *Execution // sorting of the segments from biggest to smallest helps with the fact that // the biggest elements will need to take the most elements, and for them it // will be the hardest to not get sequential elements. - prev := make([]int64, sequenceLength) chosenCounts := make([]int64, sequenceLength) - saveIndex := func(iteration int64, index int, numerator int64) { - offsets[index] = append(offsets[index], iteration-prev[index]) - prev[index] = iteration - if int64(len(offsets[index])) == numerator { - offsets[index] = append(offsets[index], offsets[index][0]+lcd-iteration) - } - } for i := int64(0); i < lcd; i++ { for sortedIndex, chosenCount := range chosenCounts { num := chosenCount * lcd denom := sortedNormalizedIndexes[sortedIndex].normNumerator if i > num/denom || (i == num/denom && num%denom == 0) { chosenCounts[sortedIndex]++ - saveIndex(i, sortedNormalizedIndexes[sortedIndex].originalIndex, denom) + index := sortedNormalizedIndexes[sortedIndex].originalIndex + jumps[index] = append(jumps[index], i) break } } } - return &ExecutionSegmentSequenceWrapper{ExecutionSegmentSequence: ess, lcd: lcd, offsets: offsets} + return &ExecutionSegmentSequenceWrapper{ExecutionSegmentSequence: ess, lcd: lcd, jumps: jumps} } // LCD returns the (cached) least common denominator of the sequence - no need @@ -593,13 +586,23 @@ func (essw *ExecutionSegmentSequenceWrapper) LCD() int64 { // ScaleInt64 scales the provided value for the given segment. func (essw *ExecutionSegmentSequenceWrapper) ScaleInt64(segmentIndex int, value int64) int64 { - start := essw.offsets[segmentIndex][0] - offsets := essw.offsets[segmentIndex][1:] - result := (value / essw.lcd) * int64(len(offsets)) - for gi, i := 0, start; i < value%essw.lcd; gi, i = gi+1, i+offsets[gi] { - result++ + jumps := essw.jumps[segmentIndex] + endValue := (value / essw.lcd) * int64(len(jumps)) + remaining := value % essw.lcd + if jumps[0] <= remaining { + i, j := 0, len(jumps) + for i < j { + h := int(uint(i+j) >> 1) // avoid overflow when computing h + // i ≤ h < j + if jumps[h] < remaining { + i = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + endValue += int64(i) } - return result + return endValue } // GetStripedOffsets returns the stripped offsets for the given segment @@ -611,8 +614,24 @@ func (essw *ExecutionSegmentSequenceWrapper) ScaleInt64(segmentIndex int, value // - lcd: the LCD of the lengths of all segments in the sequence. This is also the number of // elements after which the algorithm starts to loop and give the same values func (essw *ExecutionSegmentSequenceWrapper) GetStripedOffsets(segmentIndex int) (int64, []int64, int64) { - offsets := essw.offsets[segmentIndex] - return offsets[0], offsets[1:], essw.lcd + jumps := essw.jumps[segmentIndex] + offsets := make([]int64, len(jumps)) + for i := 1; i < len(jumps); i++ { + offsets[i-1] = jumps[i] - jumps[i-1] + } + offsets[len(offsets)-1] = essw.lcd - jumps[len(jumps)-1] + jumps[0] + return jumps[0], offsets, essw.lcd +} + +// GetStripedJumps returns the stripped jumps for the given segment +// the returned values are as follows in order: +// - jumps: a list of jumps from the beginning value for the segment. This are only the jumps +// to from the start to the next start if we chunk the elements we are going to strip +// into lcd sized chunks +// - lcd: the LCD of the lengths of all segments in the sequence. This is also the number of +// elements after which the algorithm starts to loop and give the same values +func (essw *ExecutionSegmentSequenceWrapper) GetStripedJumps(segmentIndex int) ([]int64, int64) { + return essw.jumps[segmentIndex], essw.lcd } // GetTuple returns an ExecutionTuple for the specified segment index. @@ -758,6 +777,11 @@ func (et *ExecutionTuple) GetStripedOffsets() (int64, []int64, int64) { return et.Sequence.GetStripedOffsets(et.SegmentIndex) } +// GetStripedJumps returns the striped jumps for our execution segment. +func (et *ExecutionTuple) GetStripedJumps() ([]int64, int64) { + return et.Sequence.GetStripedJumps(et.SegmentIndex) +} + // GetNewExecutionTupleFromValue re-segments the sequence, based on the given // value (see GetNewExecutionSegmentSequenceFromValue() above), and either // returns the new tuple, or an error if the current segment isn't present in @@ -783,6 +807,7 @@ func (et *ExecutionTuple) GetNewExecutionTupleFromValue(value int64) (*Execution type SegmentedIndex struct { start, lcd int64 offsets []int64 + jumps []int64 scaled, unscaled int64 // for both the first element(vu) is 1 not 0 } @@ -790,7 +815,8 @@ type SegmentedIndex struct { // given an ExecutionTuple. func NewSegmentedIndex(et *ExecutionTuple) *SegmentedIndex { start, offsets, lcd := et.GetStripedOffsets() - return &SegmentedIndex{start: start, lcd: lcd, offsets: offsets} + jumps, _ := et.GetStripedJumps() + return &SegmentedIndex{start: start, lcd: lcd, offsets: offsets, jumps: jumps} } // Next goes to the next scaled index and moves the unscaled one accordingly. @@ -821,7 +847,6 @@ func (s *SegmentedIndex) Prev() (int64, int64) { // GoTo sets the scaled index to its biggest value for which the corresponding // unscaled index is smaller or equal to value. func (s *SegmentedIndex) GoTo(value int64) (int64, int64) { // TODO optimize - var gi int64 // Because of the cyclical nature of the striping algorithm (with a cycle // length of LCD, the least common denominator), when scaling large values // (i.e. many multiples of the LCD), we can quickly calculate how many times @@ -829,28 +854,34 @@ func (s *SegmentedIndex) GoTo(value int64) (int64, int64) { // TODO optimize wholeCycles := (value / s.lcd) // So we can set some approximate initial values quickly, since we also know // precisely how many scaled values there are per cycle length. - s.scaled = wholeCycles * int64(len(s.offsets)) - s.unscaled = wholeCycles*s.lcd + s.start + 1 // our indexes are from 1 the start is from 0 + s.scaled = wholeCycles * int64(len(s.jumps)) + s.unscaled = wholeCycles * s.lcd // our indexes are from 1 the start is from 0 // Approach the final value using the slow algorithm with the step by step loop // TODO: this can be optimized by another array with size offsets that instead of the offsets // from the previous is the offset from either 0 or start - i := s.start - for ; i < value%s.lcd; gi, i = gi+1, i+s.offsets[gi] { - s.scaled++ - s.unscaled += s.offsets[gi] - } - if gi > 0 { // there were more values after the wholecycles - // the last offset actually shouldn't have been added - s.unscaled -= s.offsets[gi-1] - } else if s.scaled > 0 { // we didn't actually have more values after the wholecycles but we still had some + remaining := value % s.lcd + switch { + case s.jumps[0]+1 > remaining: + // we didn't actually have more values after the wholecycles but we still had some // in this case the unscaled value needs to move back by the last offset as it would've been // the one to get it from the value it needs to be to it's current one - s.unscaled -= s.offsets[len(s.offsets)-1] - } - - if s.scaled == 0 { - s.unscaled = 0 // we would've added the start and 1 + if wholeCycles > 0 { + s.unscaled -= s.lcd - s.jumps[len(s.jumps)-1] - 1 + } + default: + i, j := 0, len(s.jumps) + for i < j { + h := int(uint(i+j) >> 1) // avoid overflow when computing h + // i ≤ h < j + if s.jumps[h] < remaining { + i = h + 1 // preserves f(i-1) == false + } else { + j = h // preserves f(j) == true + } + } + s.scaled += int64(i) + s.unscaled += s.jumps[i-1] + 1 } return s.scaled, s.unscaled diff --git a/lib/execution_segment_test.go b/lib/execution_segment_test.go index 07f490841e8..87145aa4876 100644 --- a/lib/execution_segment_test.go +++ b/lib/execution_segment_test.go @@ -1049,7 +1049,7 @@ func TestSegmentedIndex(t *testing.T) { t.Run("strange", func(t *testing.T) { t.Parallel() - s := SegmentedIndex{start: 1, lcd: 7, offsets: []int64{4, 3}} + s := SegmentedIndex{start: 1, lcd: 7, offsets: []int64{4, 3}, jumps: []int64{1, 5}} s.Next() assert.EqualValues(t, 2, s.unscaled) diff --git a/lib/executor/ramping_vus_test.go b/lib/executor/ramping_vus_test.go index f129b1c448c..63f363ff9f9 100644 --- a/lib/executor/ramping_vus_test.go +++ b/lib/executor/ramping_vus_test.go @@ -1036,6 +1036,26 @@ func BenchmarkRampingVUsGetRawExecutionSteps(b *testing.B) { { name: "normal", stages: `[{"duration":"5m", "target":5000},{"duration":"5m", "target":5000},{"duration":"5m", "target":10000},{"duration":"5m", "target":10000}]`, + }, { + name: "jumpy", + stages: `[{"duration":"5m", "target":5000},{"duration":"0s", "target":0}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":5432}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":0}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":5432}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":0}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":5432}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":0}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":5432}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":0}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":5432}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":0}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":5432}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":0}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":5432}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":0}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":5432}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":0}, + {"duration":"5m", "target":5000},{"duration":"0s", "target":5432}]`, }, { name: "rollercoaster", stages: `[{"duration":"5m", "target":5000},{"duration":"5m", "target":0},