Skip to content

Commit

Permalink
Optimise read planning.
Browse files Browse the repository at this point in the history
Previously we had to do do a linear search through the list of
segment reference to find which file to read a given entry from.

This commit introduced a new module `ra_lol` which is a
"list of list" type of ordered data structure that replaces
the use of a plain list.

ra_log keeps data in a list of 64 entry sized "rows" which
subtantially reduces the amount of work needed to find a
given segment ref as we first scan each row to find the
row with the segref in then we can the fixed size row.

Even a server with 4096 segment only need to scan at most
128 to find any given entry.

This commit also fixes a few issues in segref compaction
and changes the type of the segment ref to a {ra_range:range(), filename()}
to work better with the ra_range module.
  • Loading branch information
kjnilsson committed Dec 11, 2024
1 parent c8dbe23 commit 79a1d39
Show file tree
Hide file tree
Showing 8 changed files with 503 additions and 192 deletions.
24 changes: 15 additions & 9 deletions src/ra_log.erl
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@
-define(WAL_RESEND_TIMEOUT, 5000).

-type ra_meta_key() :: atom().
-type segment_ref() :: {From :: ra_index(), To :: ra_index(),
File :: file:filename_all()}.
-type segment_ref() :: {ra_range:range(), File :: file:filename_all()}.
-type event_body() :: {written, ra_term(), ra:range()} |
{segments, [{ets:tid(), ra:range()}], [segment_ref()]} |
{resend_write, ra_index()} |
Expand Down Expand Up @@ -279,7 +278,7 @@ init(#{uid := UId,
LastSegRefIdx = case SegRefs of
[] ->
-1;
[{_, L, _} | _] ->
[{{_, L}, _} | _] ->
L
end,
LastWrittenIdx = case ra_log_wal:last_writer_seq(Wal, UId) of
Expand Down Expand Up @@ -969,8 +968,14 @@ should_snapshot(snapshot, Idx,
% We should take a snapshot if the new snapshot index would allow us
% to discard any segments or if the we've handled enough commands
% since the last snapshot.
CanFreeSegments = lists:any(fun({_, To, _}) -> To =< Idx end,
ra_log_reader:segment_refs(Reader)),
CanFreeSegments = case ra_log_reader:range(Reader) of
undefined ->
false;
{Start, _End} ->
%% this isn't 100% guaranteed to free a segment
%% but there is a good chance
Idx > Start
end,
CanFreeSegments orelse Idx > SnapLimit;
should_snapshot(checkpoint, Idx,
#?MODULE{cfg = #cfg{min_checkpoint_interval = CheckpointInter},
Expand Down Expand Up @@ -1029,7 +1034,8 @@ overview(#?MODULE{last_index = LastIndex,
last_term => LastTerm,
first_index => FirstIndex,
last_written_index_term => LWIT,
num_segments => length(ra_log_reader:segment_refs(Reader)),
num_segments => ra_log_reader:segment_ref_count(Reader),
segments_range => ra_log_reader:range(Reader),
open_segments => ra_log_reader:num_open_segments(Reader),
snapshot_index => case CurrSnap of
undefined -> undefined;
Expand Down Expand Up @@ -1166,9 +1172,9 @@ delete_segments(SnapIdx, #?MODULE{cfg = #cfg{log_id = LogId,
ok = ra_log_segment_writer:truncate_segments(SegWriter,
UId, Pivot)
end),
Active = ra_log_reader:segment_refs(Reader),
NumActive = ra_log_reader:segment_ref_count(Reader),
?DEBUG("~ts: ~b obsolete segments at ~b - remaining: ~b, pivot ~0p",
[LogId, length(Obsolete), SnapIdx, length(Active), Pivot]),
[LogId, length(Obsolete), SnapIdx, NumActive, Pivot]),
State = State0#?MODULE{reader = Reader},
{State, log_update_effects(Readers, Pid, State)}
end.
Expand Down Expand Up @@ -1331,7 +1337,7 @@ recover_ranges(UId, MtRange, SegWriter) ->
[SegRef | Acc]
end
end, [], SegFiles),
SegRanges = [{F, L} || {F, L, _} <- SegRefs],
SegRanges = [Range || {Range, _} <- SegRefs],
Ranges = [MtRange | SegRanges],
{pick_range(Ranges, undefined), SegRefs}.

Expand Down
Loading

0 comments on commit 79a1d39

Please sign in to comment.