From 6f01129b7e3051ce4e8f6df1018ae3f697c4e4b8 Mon Sep 17 00:00:00 2001
From: Mihail Stoykov <M.Stoikov@gmail.com>
Date: Sat, 25 Apr 2020 18:14:02 +0300
Subject: [PATCH] implementation of scale using jumps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This implementation saves the offset from the start of the cycle instead
of from the previous step, called jumps later. This favors big numerator
segments as can be seen by the benchmarks.

I decided to drop the offsets and just use the jumps to get the offsets
as the most CPU intensive part of this is calculating the jumps
Obviously this means that if most uses cases require the offsets this
is not better performing

I would expect that in the cases that this gets worsen it gets worsen
insignificantly compared to the cases where the new "jumps" give much
better performance. See the absolute values as well as the difference.

The sort.Search is inlined because this gives something like 30-70%
boost which I would argue here it is worth the few extra lines

name                                                                                                                  old time/op        new time/op        delta
pkg:go.k6.io/k6/lib goos:linux goarch:amd64
GetStripedOffsets/length10,seed777-8                                                                                        36.0µs ±29%        34.1µs ±36%      ~     (p=0.796 n=10+10)
GetStripedOffsets/length100,seed777-8                                                                                       1.57ms ± 9%        1.36ms ±16%   -13.47%  (p=0.001 n=9+10)
GetStripedOffsetsEven/length10-8                                                                                            5.74µs ± 5%        5.01µs ± 6%   -12.78%  (p=0.000 n=10+10)
GetStripedOffsetsEven/length100-8                                                                                           68.9µs ±10%        57.7µs ± 6%   -16.28%  (p=0.000 n=10+10)
GetStripedOffsetsEven/length1000-8                                                                                          3.16ms ±12%        3.03ms ± 7%      ~     (p=0.089 n=10+10)
ExecutionSegmentScale/seq:;segment:/segment.Scale(5)-8                                                                      2.52ns ± 5%        2.56ns ± 4%      ~     (p=0.184 n=10+10)
ExecutionSegmentScale/seq:;segment:/et.Scale(5)-8                                                                           3.50µs ± 5%        3.14µs ±14%   -10.47%  (p=0.001 n=9+10)
ExecutionSegmentScale/seq:;segment:/et.Scale(5)_prefilled-8                                                                 0.65ns ± 8%        2.53ns ± 1%  +289.75%  (p=0.000 n=10+9)
ExecutionSegmentScale/seq:;segment:/segment.Scale(5523)-8                                                                   2.48ns ± 5%        2.58ns ± 5%    +4.30%  (p=0.009 n=10+10)
ExecutionSegmentScale/seq:;segment:/et.Scale(5523)-8                                                                        3.71µs ± 9%        3.14µs ± 8%   -15.54%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:/et.Scale(5523)_prefilled-8                                                              0.61ns ± 7%        2.49ns ± 2%  +304.56%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:/segment.Scale(5000000)-8                                                                2.33ns ± 6%        2.56ns ± 4%    +9.78%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:/et.Scale(5000000)-8                                                                     3.84µs ± 5%        3.13µs ± 3%   -18.57%  (p=0.000 n=9+9)
ExecutionSegmentScale/seq:;segment:/et.Scale(5000000)_prefilled-8                                                           0.63ns ± 7%        2.52ns ± 3%  +297.39%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:/segment.Scale(67280421310721)-8                                                         2.36ns ± 5%        2.55ns ± 4%    +8.09%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:/et.Scale(67280421310721)-8                                                              3.78µs ± 6%        3.18µs ± 6%   -15.98%  (p=0.000 n=9+10)
ExecutionSegmentScale/seq:;segment:/et.Scale(67280421310721)_prefilled-8                                                    0.62ns ± 6%        2.51ns ± 4%  +302.29%  (p=0.000 n=10+9)
ExecutionSegmentScale/seq:;segment:0:1/segment.Scale(5)-8                                                                   2.22µs ± 7%        1.94µs ± 8%   -12.42%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5)-8                                                                        4.03µs ± 7%        3.35µs ± 5%   -17.03%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5)_prefilled-8                                                              0.65ns ± 7%        2.49ns ± 3%  +283.93%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:0:1/segment.Scale(5523)-8                                                                2.24µs ± 7%        1.94µs ± 8%   -13.50%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5523)-8                                                                     3.94µs ± 5%        3.45µs ± 7%   -12.47%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5523)_prefilled-8                                                           0.63ns ± 8%        2.49ns ± 5%  +297.91%  (p=0.000 n=10+9)
ExecutionSegmentScale/seq:;segment:0:1/segment.Scale(5000000)-8                                                             2.31µs ± 8%        1.95µs ± 9%   -15.43%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5000000)-8                                                                  3.89µs ± 8%        3.32µs ± 6%   -14.62%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:0:1/et.Scale(5000000)_prefilled-8                                                        0.62ns ± 5%        2.53ns ± 4%  +309.71%  (p=0.000 n=9+10)
ExecutionSegmentScale/seq:;segment:0:1/segment.Scale(67280421310721)-8                                                      2.26µs ± 2%        1.87µs ± 5%   -17.13%  (p=0.000 n=10+8)
ExecutionSegmentScale/seq:;segment:0:1/et.Scale(67280421310721)-8                                                           3.88µs ± 7%        3.48µs ± 7%   -10.17%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:0:1/et.Scale(67280421310721)_prefilled-8                                                 0.61ns ± 6%        2.51ns ± 3%  +309.80%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/segment.Scale(5)-8                                      2.93µs ± 6%        2.55µs ± 4%   -13.11%  (p=0.000 n=10+9)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5)-8                                           4.65µs ± 5%        4.03µs ± 6%   -13.50%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5)_prefilled-8                                 12.1ns ± 3%         6.3ns ± 3%   -47.73%  (p=0.000 n=10+9)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/segment.Scale(5523)-8                                   2.80µs ± 5%        2.35µs ± 5%   -16.12%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5523)-8                                        4.66µs ± 5%        4.04µs ± 8%   -13.21%  (p=0.000 n=9+10)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5523)_prefilled-8                              10.3ns ± 3%         6.3ns ± 3%   -39.20%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/segment.Scale(5000000)-8                                2.43µs ± 8%        2.05µs ± 8%   -15.65%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5000000)-8                                     4.80µs ± 6%        4.04µs ± 9%   -15.74%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(5000000)_prefilled-8                           6.61ns ± 5%        7.67ns ± 7%   +15.93%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/segment.Scale(67280421310721)-8                         2.63µs ± 6%        2.25µs ± 8%   -14.41%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(67280421310721)-8                              4.70µs ± 7%        3.98µs ±14%   -15.29%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/et.Scale(67280421310721)_prefilled-8                    20.2ns ± 3%        10.6ns ± 5%   -47.60%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/segment.Scale(5)-8                          2.79µs ± 6%        2.35µs ± 4%   -15.79%  (p=0.000 n=10+9)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5)-8                               5.49µs ± 7%        4.65µs ± 8%   -15.32%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5)_prefilled-8                     8.07ns ± 3%        4.96ns ± 5%   -38.60%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/segment.Scale(5523)-8                       2.58µs ± 8%        2.23µs ± 4%   -13.49%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5523)-8                            5.42µs ± 4%        4.51µs ± 4%   -16.76%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5523)_prefilled-8                  10.3ns ± 3%         5.7ns ± 4%   -45.04%  (p=0.000 n=10+9)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/segment.Scale(5000000)-8                    2.41µs ± 9%        2.04µs ± 9%   -15.37%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5000000)-8                         5.48µs ± 8%        4.55µs ±11%   -17.08%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(5000000)_prefilled-8               6.70ns ± 1%        7.03ns ± 2%    +4.88%  (p=0.000 n=9+9)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/segment.Scale(67280421310721)-8             2.60µs ± 7%        2.20µs ±10%   -15.28%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(67280421310721)-8                  5.49µs ± 5%        4.51µs ± 6%   -17.94%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/et.Scale(67280421310721)_prefilled-8        20.2ns ± 4%        10.5ns ± 4%   -47.85%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2/5:4/5/segment.Scale(5)-8                                                               3.41µs ±11%        2.94µs ± 7%   -13.93%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5)-8                                                                    6.62µs ± 5%        5.93µs ± 7%   -10.44%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5)_prefilled-8                                                          4.03ns ± 3%        4.40ns ± 2%    +9.16%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2/5:4/5/segment.Scale(5523)-8                                                            3.60µs ± 8%        3.09µs ± 8%   -14.02%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5523)-8                                                                 6.79µs ± 6%        6.13µs ± 6%    -9.77%  (p=0.000 n=10+9)
ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5523)_prefilled-8                                                       11.0ns ± 3%         6.4ns ± 7%   -41.82%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2/5:4/5/segment.Scale(5000000)-8                                                         3.37µs ± 6%        2.89µs ±10%   -14.11%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5000000)-8                                                              6.94µs ± 2%        5.93µs ± 3%   -14.59%  (p=0.000 n=9+8)
ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(5000000)_prefilled-8                                                    6.98ns ± 3%        7.38ns ± 3%    +5.86%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2/5:4/5/segment.Scale(67280421310721)-8                                                  3.86µs ± 3%        3.29µs ± 5%   -14.81%  (p=0.000 n=8+9)
ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(67280421310721)-8                                                       6.75µs ± 6%        6.07µs ± 7%   -10.05%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2/5:4/5/et.Scale(67280421310721)_prefilled-8                                             10.0ns ± 3%        10.5ns ± 2%    +5.63%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/segment.Scale(5)-8                                                         3.51µs ± 8%        3.03µs ± 8%   -13.52%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5)-8                                                               809µs ± 4%         635µs ± 7%   -21.45%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5)_prefilled-8                                                    12.0ns ± 2%        18.8ns ± 5%   +55.93%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/segment.Scale(5523)-8                                                      3.71µs ± 7%        3.20µs ± 4%   -13.69%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5523)-8                                                            828µs ± 5%         635µs ± 6%   -23.35%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5523)_prefilled-8                                                 8.11µs ± 3%        0.02µs ± 5%   -99.78%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/segment.Scale(5000000)-8                                                   3.63µs ± 3%        3.04µs ± 7%   -16.06%  (p=0.000 n=8+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5000000)-8                                                         929µs ± 6%         642µs ± 5%   -30.91%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(5000000)_prefilled-8                                              41.2µs ± 2%         0.0µs ± 4%   -99.96%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/segment.Scale(67280421310721)-8                                            3.94µs ± 4%        3.40µs ± 5%   -13.77%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(67280421310721)-8                                                  908µs ± 9%         630µs ± 8%   -30.63%  (p=0.000 n=10+10)
ExecutionSegmentScale/seq:;segment:2235/5213:4/5/et.Scale(67280421310721)_prefilled-8                                       41.4µs ± 2%         0.0µs ± 6%   -99.95%  (p=0.000 n=9+10)
pkg:go.k6.io/k6/lib/executor goos:linux goarch:amd64
Cal/1s-8                                                                                                                    4.25µs ±14%        5.24µs ±13%   +23.21%  (p=0.000 n=10+10)
Cal/1m0s-8                                                                                                                   273µs ± 3%         309µs ± 8%   +13.09%  (p=0.000 n=9+9)
CalRat/1s-8                                                                                                                 12.1ms ± 2%        14.4ms ± 2%   +18.80%  (p=0.000 n=8+8)
CalRat/1m0s-8                                                                                                                8.12s ± 2%         8.06s ± 1%      ~     (p=0.408 n=10+8)
RampingVUsGetRawExecutionSteps/seq:;segment:/normal-8                                                                        390µs ± 5%         312µs ± 6%   -19.97%  (p=0.000 n=10+10)
RampingVUsGetRawExecutionSteps/seq:;segment:/rollercoaster-8                                                                3.99ms ± 7%        3.21ms ± 9%   -19.58%  (p=0.000 n=10+10)
RampingVUsGetRawExecutionSteps/seq:;segment:0:1/normal-8                                                                     386µs ± 5%         304µs ± 5%   -21.39%  (p=0.000 n=9+10)
RampingVUsGetRawExecutionSteps/seq:;segment:0:1/rollercoaster-8                                                             3.95ms ± 5%        3.21ms ± 6%   -18.81%  (p=0.000 n=10+10)
RampingVUsGetRawExecutionSteps/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/normal-8                                        114µs ± 5%          91µs ± 9%   -19.58%  (p=0.000 n=10+10)
RampingVUsGetRawExecutionSteps/seq:0,0.3,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.3/rollercoaster-8                                1.25ms ± 6%        1.00ms ± 9%   -19.82%  (p=0.000 n=10+10)
RampingVUsGetRawExecutionSteps/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/normal-8                           38.5µs ± 5%        32.8µs ± 5%   -14.80%  (p=0.000 n=10+9)
RampingVUsGetRawExecutionSteps/seq:0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1;segment:0:0.1/rollercoaster-8                     425µs ± 8%         324µs ± 5%   -23.90%  (p=0.000 n=10+10)
RampingVUsGetRawExecutionSteps/seq:;segment:2/5:4/5/normal-8                                                                 152µs ±10%         121µs ±10%   -20.07%  (p=0.000 n=10+10)
RampingVUsGetRawExecutionSteps/seq:;segment:2/5:4/5/rollercoaster-8                                                         1.60ms ± 6%        1.28ms ± 5%   -20.02%  (p=0.000 n=10+9)
RampingVUsGetRawExecutionSteps/seq:;segment:2235/5213:4/5/normal-8                                                           148µs ± 5%         138µs ± 7%    -6.56%  (p=0.000 n=10+10)
RampingVUsGetRawExecutionSteps/seq:;segment:2235/5213:4/5/rollercoaster-8                                                   1.42ms ±11%        1.20ms ± 6%   -15.34%  (p=0.000 n=10+10)
VUHandleIterations-8                                                                                                         1.00s ± 0%         1.00s ± 0%      ~     (p=0.529 n=10+10)

name                                                                                                                  old iterations/s   new iterations/s   delta
pkg:go.k6.io/k6/lib/executor goos:linux goarch:amd64
RampingArrivalRateRun/VUs10-8                                                                                                 251k ± 5%          262k ± 6%    +4.33%  (p=0.043 n=10+10)
RampingArrivalRateRun/VUs100-8                                                                                                315k ± 2%          321k ± 2%    +2.07%  (p=0.002 n=10+10)
RampingArrivalRateRun/VUs1000-8                                                                                               291k ± 2%          306k ± 1%    +5.02%  (p=0.000 n=10+10)
RampingArrivalRateRun/VUs10000-8                                                                                              266k ± 2%          286k ± 2%    +7.84%  (p=0.000 n=10+10)
VUHandleIterations-8                                                                                                          0.09 ± 6%          0.08 ± 6%   -14.06%  (p=0.000 n=10+10)
---
 lib/execution_segment.go         | 111 ++++++++++++++++++++-----------
 lib/execution_segment_test.go    |   2 +-
 lib/executor/ramping_vus_test.go |  20 ++++++
 3 files changed, 92 insertions(+), 41 deletions(-)

diff --git a/lib/execution_segment.go b/lib/execution_segment.go
index 9dff1c0358b..0036540dccd 100644
--- a/lib/execution_segment.go
+++ b/lib/execution_segment.go
@@ -494,9 +494,9 @@ type ExecutionSegmentSequenceWrapper struct {
 	ExecutionSegmentSequence       // a filled-out segment sequence
 	lcd                      int64 // pre-calculated least common denominator
 
-	// The striped offsets, i.e. the repeating indexes that "belong" to each
+	// The striped jumps, i.e. the repeating indexes that "belong" to each
 	// execution segment in the sequence.
-	offsets [][]int64
+	jumps [][]int64
 }
 
 // NewExecutionSegmentSequenceWrapper expects a filled-out execution segment
@@ -508,7 +508,7 @@ func NewExecutionSegmentSequenceWrapper(ess ExecutionSegmentSequence) *Execution
 	}
 
 	sequenceLength := len(ess)
-	offsets := make([][]int64, sequenceLength)
+	jumps := make([][]int64, sequenceLength)
 	lcd := ess.LCD()
 
 	// This will contain the normalized numerator values (i.e. what they would have
@@ -524,7 +524,7 @@ func NewExecutionSegmentSequenceWrapper(ess ExecutionSegmentSequence) *Execution
 		normalizedNumerator := ess[i].length.Num().Int64() * (lcd / ess[i].length.Denom().Int64())
 		sortedNormalizedIndexes[i].normNumerator = normalizedNumerator
 		sortedNormalizedIndexes[i].originalIndex = i
-		offsets[i] = make([]int64, 0, normalizedNumerator+1)
+		jumps[i] = make([]int64, 0, normalizedNumerator)
 	}
 
 	sort.SliceStable(sortedNormalizedIndexes, func(i, j int) bool {
@@ -561,28 +561,21 @@ func NewExecutionSegmentSequenceWrapper(ess ExecutionSegmentSequence) *Execution
 	// sorting of the segments from biggest to smallest helps with the fact that
 	// the biggest elements will need to take the most elements, and for them it
 	// will be the hardest to not get sequential elements.
-	prev := make([]int64, sequenceLength)
 	chosenCounts := make([]int64, sequenceLength)
-	saveIndex := func(iteration int64, index int, numerator int64) {
-		offsets[index] = append(offsets[index], iteration-prev[index])
-		prev[index] = iteration
-		if int64(len(offsets[index])) == numerator {
-			offsets[index] = append(offsets[index], offsets[index][0]+lcd-iteration)
-		}
-	}
 	for i := int64(0); i < lcd; i++ {
 		for sortedIndex, chosenCount := range chosenCounts {
 			num := chosenCount * lcd
 			denom := sortedNormalizedIndexes[sortedIndex].normNumerator
 			if i > num/denom || (i == num/denom && num%denom == 0) {
 				chosenCounts[sortedIndex]++
-				saveIndex(i, sortedNormalizedIndexes[sortedIndex].originalIndex, denom)
+				index := sortedNormalizedIndexes[sortedIndex].originalIndex
+				jumps[index] = append(jumps[index], i)
 				break
 			}
 		}
 	}
 
-	return &ExecutionSegmentSequenceWrapper{ExecutionSegmentSequence: ess, lcd: lcd, offsets: offsets}
+	return &ExecutionSegmentSequenceWrapper{ExecutionSegmentSequence: ess, lcd: lcd, jumps: jumps}
 }
 
 // LCD returns the (cached) least common denominator of the sequence - no need
@@ -593,13 +586,23 @@ func (essw *ExecutionSegmentSequenceWrapper) LCD() int64 {
 
 // ScaleInt64 scales the provided value for the given segment.
 func (essw *ExecutionSegmentSequenceWrapper) ScaleInt64(segmentIndex int, value int64) int64 {
-	start := essw.offsets[segmentIndex][0]
-	offsets := essw.offsets[segmentIndex][1:]
-	result := (value / essw.lcd) * int64(len(offsets))
-	for gi, i := 0, start; i < value%essw.lcd; gi, i = gi+1, i+offsets[gi] {
-		result++
+	jumps := essw.jumps[segmentIndex]
+	endValue := (value / essw.lcd) * int64(len(jumps))
+	remaining := value % essw.lcd
+	if jumps[0] <= remaining {
+		i, j := 0, len(jumps)
+		for i < j {
+			h := int(uint(i+j) >> 1) // avoid overflow when computing h
+			// i ≤ h < j
+			if jumps[h] < remaining {
+				i = h + 1 // preserves f(i-1) == false
+			} else {
+				j = h // preserves f(j) == true
+			}
+		}
+		endValue += int64(i)
 	}
-	return result
+	return endValue
 }
 
 // GetStripedOffsets returns the stripped offsets for the given segment
@@ -611,8 +614,24 @@ func (essw *ExecutionSegmentSequenceWrapper) ScaleInt64(segmentIndex int, value
 // - lcd: the LCD of the lengths of all segments in the sequence. This is also the number of
 //        elements after which the algorithm starts to loop and give the same values
 func (essw *ExecutionSegmentSequenceWrapper) GetStripedOffsets(segmentIndex int) (int64, []int64, int64) {
-	offsets := essw.offsets[segmentIndex]
-	return offsets[0], offsets[1:], essw.lcd
+	jumps := essw.jumps[segmentIndex]
+	offsets := make([]int64, len(jumps))
+	for i := 1; i < len(jumps); i++ {
+		offsets[i-1] = jumps[i] - jumps[i-1]
+	}
+	offsets[len(offsets)-1] = essw.lcd - jumps[len(jumps)-1] + jumps[0]
+	return jumps[0], offsets, essw.lcd
+}
+
+// GetStripedJumps returns the stripped jumps for the given segment
+// the returned values are as follows in order:
+// - jumps: a list of jumps from the beginning value for the segment. This are only the jumps
+//            to from the start to the next start if we chunk the elements we are going to strip
+//            into lcd sized chunks
+// - lcd: the LCD of the lengths of all segments in the sequence. This is also the number of
+//        elements after which the algorithm starts to loop and give the same values
+func (essw *ExecutionSegmentSequenceWrapper) GetStripedJumps(segmentIndex int) ([]int64, int64) {
+	return essw.jumps[segmentIndex], essw.lcd
 }
 
 // GetTuple returns an ExecutionTuple for the specified segment index.
@@ -758,6 +777,11 @@ func (et *ExecutionTuple) GetStripedOffsets() (int64, []int64, int64) {
 	return et.Sequence.GetStripedOffsets(et.SegmentIndex)
 }
 
+// GetStripedJumps returns the striped jumps for our execution segment.
+func (et *ExecutionTuple) GetStripedJumps() ([]int64, int64) {
+	return et.Sequence.GetStripedJumps(et.SegmentIndex)
+}
+
 // GetNewExecutionTupleFromValue re-segments the sequence, based on the given
 // value (see GetNewExecutionSegmentSequenceFromValue() above), and either
 // returns the new tuple, or an error if the current segment isn't present in
@@ -783,6 +807,7 @@ func (et *ExecutionTuple) GetNewExecutionTupleFromValue(value int64) (*Execution
 type SegmentedIndex struct {
 	start, lcd       int64
 	offsets          []int64
+	jumps            []int64
 	scaled, unscaled int64 // for both the first element(vu) is 1 not 0
 }
 
@@ -790,7 +815,8 @@ type SegmentedIndex struct {
 // given an ExecutionTuple.
 func NewSegmentedIndex(et *ExecutionTuple) *SegmentedIndex {
 	start, offsets, lcd := et.GetStripedOffsets()
-	return &SegmentedIndex{start: start, lcd: lcd, offsets: offsets}
+	jumps, _ := et.GetStripedJumps()
+	return &SegmentedIndex{start: start, lcd: lcd, offsets: offsets, jumps: jumps}
 }
 
 // Next goes to the next scaled index and moves the unscaled one accordingly.
@@ -821,7 +847,6 @@ func (s *SegmentedIndex) Prev() (int64, int64) {
 // GoTo sets the scaled index to its biggest value for which the corresponding
 // unscaled index is smaller or equal to value.
 func (s *SegmentedIndex) GoTo(value int64) (int64, int64) { // TODO optimize
-	var gi int64
 	// Because of the cyclical nature of the striping algorithm (with a cycle
 	// length of LCD, the least common denominator), when scaling large values
 	// (i.e. many multiples of the LCD), we can quickly calculate how many times
@@ -829,28 +854,34 @@ func (s *SegmentedIndex) GoTo(value int64) (int64, int64) { // TODO optimize
 	wholeCycles := (value / s.lcd)
 	// So we can set some approximate initial values quickly, since we also know
 	// precisely how many scaled values there are per cycle length.
-	s.scaled = wholeCycles * int64(len(s.offsets))
-	s.unscaled = wholeCycles*s.lcd + s.start + 1 // our indexes are from 1 the start is from 0
+	s.scaled = wholeCycles * int64(len(s.jumps))
+	s.unscaled = wholeCycles * s.lcd // our indexes are from 1 the start is from 0
 	// Approach the final value using the slow algorithm with the step by step loop
 	// TODO: this can be optimized by another array with size offsets that instead of the offsets
 	// from the previous is the offset from either 0 or start
-	i := s.start
-	for ; i < value%s.lcd; gi, i = gi+1, i+s.offsets[gi] {
-		s.scaled++
-		s.unscaled += s.offsets[gi]
-	}
 
-	if gi > 0 { // there were more values after the wholecycles
-		// the last offset actually shouldn't have been added
-		s.unscaled -= s.offsets[gi-1]
-	} else if s.scaled > 0 { // we didn't actually have more values after the wholecycles but we still had some
+	remaining := value % s.lcd
+	switch {
+	case s.jumps[0]+1 > remaining:
+		// we didn't actually have more values after the wholecycles but we still had some
 		// in this case the unscaled value needs to move back by the last offset as it would've been
 		// the one to get it from the value it needs to be to it's current one
-		s.unscaled -= s.offsets[len(s.offsets)-1]
-	}
-
-	if s.scaled == 0 {
-		s.unscaled = 0 // we would've added the start and 1
+		if wholeCycles > 0 {
+			s.unscaled -= s.lcd - s.jumps[len(s.jumps)-1] - 1
+		}
+	default:
+		i, j := 0, len(s.jumps)
+		for i < j {
+			h := int(uint(i+j) >> 1) // avoid overflow when computing h
+			// i ≤ h < j
+			if s.jumps[h] < remaining {
+				i = h + 1 // preserves f(i-1) == false
+			} else {
+				j = h // preserves f(j) == true
+			}
+		}
+		s.scaled += int64(i)
+		s.unscaled += s.jumps[i-1] + 1
 	}
 
 	return s.scaled, s.unscaled
diff --git a/lib/execution_segment_test.go b/lib/execution_segment_test.go
index 07f490841e8..87145aa4876 100644
--- a/lib/execution_segment_test.go
+++ b/lib/execution_segment_test.go
@@ -1049,7 +1049,7 @@ func TestSegmentedIndex(t *testing.T) {
 
 	t.Run("strange", func(t *testing.T) {
 		t.Parallel()
-		s := SegmentedIndex{start: 1, lcd: 7, offsets: []int64{4, 3}}
+		s := SegmentedIndex{start: 1, lcd: 7, offsets: []int64{4, 3}, jumps: []int64{1, 5}}
 
 		s.Next()
 		assert.EqualValues(t, 2, s.unscaled)
diff --git a/lib/executor/ramping_vus_test.go b/lib/executor/ramping_vus_test.go
index f129b1c448c..63f363ff9f9 100644
--- a/lib/executor/ramping_vus_test.go
+++ b/lib/executor/ramping_vus_test.go
@@ -1036,6 +1036,26 @@ func BenchmarkRampingVUsGetRawExecutionSteps(b *testing.B) {
 		{
 			name:   "normal",
 			stages: `[{"duration":"5m", "target":5000},{"duration":"5m", "target":5000},{"duration":"5m", "target":10000},{"duration":"5m", "target":10000}]`,
+		}, {
+			name: "jumpy",
+			stages: `[{"duration":"5m", "target":5000},{"duration":"0s", "target":0},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":5432},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":0},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":5432},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":0},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":5432},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":0},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":5432},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":0},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":5432},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":0},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":5432},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":0},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":5432},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":0},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":5432},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":0},
+				{"duration":"5m", "target":5000},{"duration":"0s", "target":5432}]`,
 		}, {
 			name: "rollercoaster",
 			stages: `[{"duration":"5m", "target":5000},{"duration":"5m", "target":0},