-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgit-care.sh
executable file
·545 lines (447 loc) · 14.7 KB
/
git-care.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
#!/usr/bin/env bash
set -euo pipefail
PROJECT_DIR=$(git rev-parse --show-toplevel)
INTERVAL_PREFETCH=90
INTERVAL_COMMIT_GRAPH=60
INTERVAL_MIDX=60
INTERVAL_PACK_LOOSE=60
INTERVAL_PACK_REFS=300
INTERVAL_EXPIRE_REFLOG=120
INTERVAL_WORKTREE_PRUNE=120
INTERVAL_RERERE_GC=120
INTERVAL_REFRESH_INDEX=300
PREFETCH_REF_SPEC='+refs/heads/*'
SUPPORT_UNTRACKED_CACHE=1
# prefetch fetches commits from origin remote into refs with `prefetch` prefix
# this way objects database(odb) is populated in the background
# user will only need to update ref and checkout during git-fetch / git-pull
prefetch() {
git remote --verbose |\
grep fetch |\
uniq |\
while read line ; do
local remote=$(echo "${line}" | awk '{print $1}')
local url=$(echo "${line}" | awk '{print $2}')
git fetch \
--refmap \
--prune \
--prune-tags \
--quiet \
${url} \
"${PREFETCH_REF_SPEC}:refs/prefetch/${remote}/*";
done;
}
prefetch_loop() {
while true; do
INTERVAL_PREFETCH=$(git config --get 'git-care.prefetch')
if [[ ${INTERVAL_PREFETCH} -le 0 ]]; then
exit 0;
fi
prefetch || :;
sleep ${INTERVAL_PREFETCH};
done;
}
# verify_bloom_filter verify that bloom filter was used
# in a split commit-graph chain. If not, remove the chain.
verify_bloom_filter() {
for f in ${PROJECT_DIR}/.git/objects/info/commit-graphs/*.graph; do
chunk_count=$(od -j6 -N1 -An -i $f | tr -d '[:space:]')
# A commit graph always has 3 default chunks
# If there were less than 3 chunks, the file is corrupted
if [[ ${chunk_count} -lt 3 ]]; then
echo "Corrupted commit-graph in $f"
echo 'Rebuilding commit-graph chain'
rm -f ${PROJECT_DIR}/.git/objects/info/commit-graphs/commit-graph-chain
break
fi
# If there were only 3 chunks, the Bloom filter chunks are missing
if [[ ${chunk_count} == 3 ]]; then
echo "Missing Bloom filter in $f"
echo 'Rebuilding commit-graph chain with bloom filter (slow)'
rm -f ${PROJECT_DIR}/.git/objects/info/commit-graphs/commit-graph-chain
break
fi
has_bloom_index=0
has_bloom_data=0
# Find Bloom filter chunks among non-default chunks
# Always skip the first 3 default chunks
max_offset=$((chunk_count - 1))
for i in $(seq 3 ${max_offset}); do
# 8 bytes for commit-graph header
# 12 bytes for each chunk skipped
bytes_offset=$((8 + i * 12))
# skipped $bytes_offset bytes,
# read first 4 bytes of next chunk to get 4 characters chunk id
chunk_id=$(od -j${bytes_offset} -N4 -An -c $f | tr -d '[:space:]')
case $chunk_id in
'BIDX')
has_bloom_index=1
;;
'BDAT')
has_bloom_data=1
;;
*)
;;
esac
done
# If either the Bloom filter chunks were missing
if [[ $((has_bloom_index + has_bloom_data)) != 2 ]]; then
echo "Missing Bloom filter in $f"
echo 'Rebuilding commit-graph chain with bloom filter (slow)'
rm -f ${PROJECT_DIR}/.git/objects/info/commit-graphs/commit-graph-chain
break
fi
done
}
# commit_graph refreshes the commit-graph in a non-disruptive manner
# thus speed up git operations over the commit history i.e. git-log
commit_graph() {
# Split commit-graph does not work when the full commit-graph file
# is present. So we should remove it first.
#
# Reference:
# - https://github.com/git/git/blob/cb99a34e23e32ca8e94bafaa9699cfd133a17fd3/t/t5324-split-commit-graph.sh#L336
if [ -f ${PROJECT_DIR}/.git/objects/info/commit-graph ]; then
# TODO: perhaps we should write commit-graph with '--split --append' here instead?
rm -f ${PROJECT_DIR}/.git/objects/info/commit-graph
fi
# If a commit-graph chain already exists,
# check if every chain were built with Bloom filter.
if [ -f ${PROJECT_DIR}/.git/objects/info/commit-graphs/commit-graph-chain ]; then
verify_bloom_filter
fi
git commit-graph write --reachable --split --changed-paths --size-multiple=4 --no-progress;
if git commit-graph verify --shallow --no-progress; then
: # Nothing to do
else
# Somebody might broke the commit-graph by force pushing.
# This means we need to remove the old graph and rebuild
# the entire graph.
rm -f ${PROJECT_DIR}/.git/objects/info/commit-graphs/commit-graph-chain;
git commit-graph write --reachable --split --changed-paths --size-multiple=4 --no-progress;
fi
}
commit_graph_loop() {
while true; do
INTERVAL_COMMIT_GRAPH=$(git config --get 'git-care.commit-graph')
if [[ ${INTERVAL_COMMIT_GRAPH} -le 0 ]]; then
exit 0;
fi
commit_graph || :;
sleep ${INTERVAL_COMMIT_GRAPH};
done;
}
_midx_verify_or_rewrite() {
if git multi-pack-index verify --no-progress; then
:
else
rm -f ${PROJECT_DIR}/.git/objects/pack/multi-pack-index;
git multi-pack-index write --no-progress;
fi
}
# _midx_auto_size calculates the --batch-size dynamically.
# This use 2nd biggest batch to achieve a more consistent
# result than Scalar
_midx_auto_size() {
local second_biggest_pack=$(
wc -c ${PROJECT_DIR}/.git/objects/pack/*pack |\
sort -n |\
awk '{print $1}' |\
tail -3 |\
head -1
)
# Result unit is bytes
BATCH_SIZE=$(expr ${second_biggest_pack} + 1)
}
# multi_pack_index writes a multi-pack-index file
# which is used to incrementally repack them into bigger packfile.
# Pack files which are repack-ed will be cleanup with `expire`.
# Consolidating packfiles helps speed up operations such as git-log
multi_pack_index() {
git multi-pack-index write --no-progress;
_midx_verify_or_rewrite
git multi-pack-index expire --no-progress;
_midx_verify_or_rewrite
# Autosizing the --batch-size option
# If there are less than 2 packs, do a full repack
# otherwise skip the biggest packfile
local pack_count=$(find ${PROJECT_DIR}/.git/objects/pack/*.pack -type f | wc -l)
if [ ${pack_count} -le 2 ]; then
BATCH_SIZE=0
else
_midx_auto_size
fi
git multi-pack-index repack --batch-size=${BATCH_SIZE} --no-progress;
_midx_verify_or_rewrite
}
multi_pack_index_loop() {
while true; do
INTERVAL_MIDX=$(git config --get 'git-care.multi-pack-index')
if [[ ${INTERVAL_MIDX} -le 0 ]]; then
exit 0;
fi
multi_pack_index || :;
sleep ${INTERVAL_MIDX};
done;
}
# pack_loose_objects packs all loose objects into a pack file with prefix `loose`
# then clean up all the loose objects which were packed.
#
# This is meant to couple with `multi_pack_index` so that the `loose` packfile is
# eventually re-packed into bigger file and cleaned up.
#
# Note that with this approach, unreachable objects are stored and never prune from
# pack files. This is a tradeoff deliberately made for a smoother client-side experience
#
# If size is a concern, one can always run `git repack -A -d && git prune`. But I dont see
# this is very useful right now.
pack_loose_objects() {
git prune-packed --quiet;
local obj_dir_count=$(
find ${PROJECT_DIR}/.git/objects -type d |\
head -1000 |\
grep -Ev '(pack|info|objects|commit-graphs)$' |\
wc -l
)
if [ ${obj_dir_count} -ne 0 ]; then
find ${PROJECT_DIR}/.git/objects/?? -type f |\
sed -E "s|^${PROJECT_DIR}/.git/objects/(..)/|\1|" |\
git pack-objects -q ${PROJECT_DIR}/.git/objects/pack/loose 2>&1 >/dev/null;
git prune-packed --quiet;
fi
}
pack_loose_objects_loop() {
while true; do
INTERVAL_PACK_LOOSE=$(git config --get 'git-care.pack-loose-objects')
if [[ ${INTERVAL_PACK_LOOSE} -le 0 ]]; then
exit 0;
fi
pack_loose_objects || :;
sleep ${INTERVAL_PACK_LOOSE};
done;
}
# refresh_index keep your untracked cache up-to-date
# and keep watchman process awake.
# Modeled after https://github.com/microsoft/scalar/pull/365
refresh_index() {
if [[ ${SUPPORT_UNTRACKED_CACHE} -eq 1 ]]; then
git update-index --untracked-cache 2>&1 >/dev/null
GIT_FORCE_UNTRACKED_CACHE=1 \
git --no-optional-locks \
status --untracked-files=all \
--porcelain=v1 2>&1 >/dev/null
else
git --no-optional-locks \
status --untracked-files=all \
--porcelain=v1 2>&1 >/dev/null
fi
}
refresh_index_loop() {
while true; do
INTERVAL_REFRESH_INDEX=$(git config --get 'git-care.refresh-index')
if [[ ${INTERVAL_REFRESH_INDEX} -le 0 ]]; then
exit 0;
fi
refresh_index || :;
sleep ${INTERVAL_REFRESH_INDEX};
done;
}
# pack_refs organize all refs into a single file
# and makes for-each-refs a bit faster
pack_refs() {
# note that this will auto prune the packed refs
git pack-refs --all
}
pack_refs_loop() {
while true; do
INTERVAL_PACK_REFS=$(git config --get 'git-care.pack-refs')
if [[ ${INTERVAL_PACK_REFS} -le 0 ]]; then
exit 0;
fi
pack_refs || :;
sleep ${INTERVAL_PACK_REFS};
done;
}
# expire_reflog cleans the ref logs in the repo
expire_reflog() {
git reflog expire --all
}
expire_reflog_loop() {
while true; do
INTERVAL_EXPIRE_REFLOG=$(git config --get 'git-care.expire-reflog')
if [[ ${INTERVAL_EXPIRE_REFLOG} -le 0 ]]; then
exit 0;
fi
expire_reflog || :;
sleep ${INTERVAL_EXPIRE_REFLOG};
done;
}
# worktree_prune cleans administrative files left-over
# after a worktree was removed
worktree_prune() {
git worktree prune
}
worktree_prune_loop() {
while true; do
INTERVAL_WORKTREE_PRUNE=$(git config --get 'git-care.worktree-prune')
if [[ ${INTERVAL_WORKTREE_PRUNE} -le 0 ]]; then
exit 0;
fi
worktree_prune || :;
sleep ${INTERVAL_WORKTREE_PRUNE};
done;
}
# rerere_gc cleans the expired records of merge conflicts
rerere_gc() {
git rerere gc
}
rerere_gc_loop() {
while true; do
INTERVAL_RERERE_GC=$(git config --get 'git-care.rerere-gc')
if [[ ${INTERVAL_RERERE_GC} -le 0 ]]; then
exit 0;
fi
rerere_gc || :;
sleep ${INTERVAL_RERERE_GC};
done;
}
# turn_on_watchman checks if watchman executable is available
# and install the fsmonitor hook to increase git-status speed
turn_on_watchman() {
if [[ -x "$(command -v watchman)" ]]; then
cp -f ${PROJECT_DIR}/.git/hooks/fsmonitor-watchman.sample \
${PROJECT_DIR}/.git/hooks/fsmonitor-watchman;
echo 'Installed fsmonitor-watchman hook';
fi
}
turn_off_watchman() {
if [[ -f ${PROJECT_DIR}/.git/hooks/fsmonitor-watchman ]]; then
rm -f ${PROJECT_DIR}/.git/hooks/fsmonitor-watchman;
echo 'Removed fsmonitor-watchman hook';
fi
}
#############################
# #
# XXX XXX XXX XXX XXX XXX #
# XXX XXX XXX XXX XXX XXX #
# #
#############################
# start_git_care starts the background processes
start_git_care() {
echo '
Git-Care.sh requirements:
- git version >2.27.0
- watchman
'
# Run a set of tests to ensure background jobs could
# run without disruption
echo 'Running some tests before updating git configs'
echo '[1/9] Testing prefetch'
prefetch
echo '[2/9] Testing commit_graph (slow)'
commit_graph
echo '[3/9] Testing pack_loose_objects'
pack_loose_objects
echo '[4/9] Testing multi_pack_index (slow)'
multi_pack_index
echo '[5/9] Testing pack refs'
pack_refs
echo '[6/9] Testing expire reflog'
expire_reflog
echo '[7/9] Testing prune worktree'
worktree_prune
echo '[8/9] Testing expire rerere'
rerere_gc
echo '[9/9] Testing untracked-cache'
if ! git update-index --test-untracked-cache; then
SUPPORT_UNTRACKED_CACHE=0
fi
refresh_index
echo '
All tests succeed! Updating git configs.
'
# This is to ensure that multiPackIndex is always used
# during pack processes.
git config core.multiPackIndex true
# This should improve fetch speed since you dont have to
# unpack objects. But without the multi-pack-index job above,
# it can flood your repo with many packs. Should be unset
# at all times unless multi_pack_index is running.
git config transfer.unpackLimit 1
# Disable gc because we are handling it by ourselves
# also a lot of fetches from prefetch could make gc slow
echo 'Disabling auto-gc'
git config gc.auto 0
# Disable auto commit-graph creation
# Commit-graph are managed by a dedicated process so we should
# not need to rely on git process to chain call the write ops
git config fetch.writeCommitGraph false
git config fetch.experimental false
# Improve fetch negotiation time
# This is often set by 'feature.experimental' but since we unset that
# config above, we should set it here again
git config fetch.negotiationAlgorithm skipping
turn_on_watchman
# Set flags for git-care executions
git config git-care.enable 1
git config git-care.prefetch ${INTERVAL_PREFETCH}
git config git-care.commit-graph ${INTERVAL_COMMIT_GRAPH}
git config git-care.multi-pack-index ${INTERVAL_MIDX}
git config git-care.pack-loose-objects ${INTERVAL_PACK_LOOSE}
git config git-care.refresh-index ${INTERVAL_REFRESH_INDEX}
git config git-care.pack-refs ${INTERVAL_PACK_REFS}
git config git-care.worktree-prune ${INTERVAL_WORKTREE_PRUNE}
git config git-care.rerere-gc ${INTERVAL_RERERE_GC}
git config git-care.expire-reflog ${INTERVAL_EXPIRE_REFLOG}
prefetch_loop &
commit_graph_loop &
multi_pack_index_loop &
pack_loose_objects_loop &
pack_refs_loop &
worktree_prune_loop &
expire_reflog_loop &
rerere_gc_loop &
refresh_index_loop &
}
# stop_git_care stop the background processes
stop_git_care() {
# Re-enable gc
echo 'Enabling auto-gc'
git config --unset gc.auto || :
# Remove dependency on multi-pack-index
# as the index could be stale overtime
git config --unset core.multiPackIndex || :
# See note in start_git_care
git config --unset transfer.unpackLimit || :
# Commit-graph related config
git config --unset feature.experimental || :
git config --unset fetch.writeCommitGraph || :
# Unset this to follow global's feature.experimental
git config --unset fetch.negotiationAlgorithm || :
# Unset all git-care config
git config --remove-section git-care || :
turn_off_watchman
}
if [[ $(git config --get 'git-care.enable') -eq 0 ]]; then
echo 'Starting git-care';
start_git_care;
echo 'Started git-care';
echo '
To monitor operations, you can run
> watch git count-objects -v -H
To toggle git-care, just run it again.
> ./git-care.sh
To tune the interval, adjust the value in git config
> vim .git/config
To force-kill all git-care processes and ignore git config changes. (not recommended)
> kill $(ps aux | grep 'git-care' | grep -v grep | grep -v vim | awk '\''{print $2}'\'')
'
else
echo 'Stopping git-care';
stop_git_care;
echo '
Unset all configs, all background jobs will auto-stop.
To force-kill all git-care processes and ignore git config changes. (not recommended)
> kill $(ps aux | grep 'git-care' | grep -v grep | grep -v vim | awk '\''{print $2}'\'')
';
fi