Skip to content

Commit 3adc5aa

Browse files
committed
box: don't delete inprogress files during normal GC
*.inprogress files shouldn't be deleted during normal GC, because they may be in use. E.g. GC may happen to run while WAL is rotating xlog; if GC removes the transient xlog.inprogress file created by WAL (see xdir_create_xlog), WAL will fail to rename it and abort the current transaction. Initially, inprogress files were removed only after recovery. For this reason, xdir_collect_inprogress, which is used for deleting inprogress files, accesses FS directly, without COIO. This was changed by commit 5aa243d ("recovery: build secondary index in hot standby mode") which moved xdir_collect_inprogress from memtx_engine_end_recovery to memtx_engine_collect_garbage so that a hot standby instance doesn't delete inprogress files of the master instance by mistake. To fix this issue, let's move xdir_collect_inprogress back where it belongs, to engine_end_recovery, and introduce a new callback for memtx to build its secondary keys before entering the hot standby mode - engine_begin_hot_standby. Let's also remove engine_collect_garbage from gc_init, which was added there by the aforementioned commit, probably to make tests pass. The bug was reported by the vinyl/deferred_delete test (#5089). Closes #6554 (cherry picked from commit f9bbfff)
1 parent cab3e23 commit 3adc5aa

16 files changed

+250
-8
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
## bugfix/core
2+
3+
* Fixed a bug because of which the garbage collector could remove an xlog file
4+
that is still in use (gh-6554).

src/box/blackhole.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ static const struct engine_vtab blackhole_engine_vtab = {
188188
/* .bootstrap = */ generic_engine_bootstrap,
189189
/* .begin_initial_recovery = */ generic_engine_begin_initial_recovery,
190190
/* .begin_final_recovery = */ generic_engine_begin_final_recovery,
191+
/* .begin_hot_standby = */ generic_engine_begin_hot_standby,
191192
/* .end_recovery = */ generic_engine_end_recovery,
192193
/* .begin_checkpoint = */ generic_engine_begin_checkpoint,
193194
/* .wait_checkpoint = */ generic_engine_wait_checkpoint,

src/box/box.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3362,14 +3362,14 @@ local_recovery(const struct tt_uuid *instance_uuid,
33623362
diag_raise();
33633363
diag_log();
33643364
}
3365-
engine_end_recovery_xc();
33663365
/*
33673366
* Leave hot standby mode, if any, only after
33683367
* acquiring the lock.
33693368
*/
33703369
if (wal_dir_lock < 0) {
33713370
title("hot_standby");
33723371
say_info("Entering hot standby mode");
3372+
engine_begin_hot_standby_xc();
33733373
recovery_follow_local(recovery, &wal_stream.base, "hot_standby",
33743374
cfg_getd("wal_dir_rescan_delay"));
33753375
while (true) {
@@ -3410,6 +3410,8 @@ local_recovery(const struct tt_uuid *instance_uuid,
34103410
if (wal_enable() != 0)
34113411
diag_raise();
34123412

3413+
engine_end_recovery_xc();
3414+
34133415
/* Check replica set UUID. */
34143416
if (!tt_uuid_is_nil(replicaset_uuid) &&
34153417
!tt_uuid_is_equal(replicaset_uuid, &REPLICASET_UUID)) {

src/box/engine.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,17 @@ engine_begin_final_recovery(void)
113113
return 0;
114114
}
115115

116+
int
117+
engine_begin_hot_standby(void)
118+
{
119+
struct engine *engine;
120+
engine_foreach(engine) {
121+
if (engine->vtab->begin_hot_standby(engine) != 0)
122+
return -1;
123+
}
124+
return 0;
125+
}
126+
116127
int
117128
engine_end_recovery(void)
118129
{
@@ -348,6 +359,13 @@ generic_engine_begin_final_recovery(struct engine *engine)
348359
return 0;
349360
}
350361

362+
int
363+
generic_engine_begin_hot_standby(struct engine *engine)
364+
{
365+
(void)engine;
366+
return 0;
367+
}
368+
351369
int
352370
generic_engine_end_recovery(struct engine *engine)
353371
{

src/box/engine.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,11 @@ struct engine_vtab {
147147
* of WAL catch up durin join on slave side
148148
*/
149149
int (*begin_final_recovery)(struct engine *);
150+
/**
151+
* Notify the engine that the instance is about to enter
152+
* the hot standby mode to complete recovery from WALs.
153+
*/
154+
int (*begin_hot_standby)(struct engine *);
150155
/**
151156
* Inform the engine about the end of recovery from the
152157
* binary log.
@@ -337,9 +342,14 @@ engine_begin_initial_recovery(const struct vclock *recovery_vclock);
337342
int
338343
engine_begin_final_recovery(void);
339344

345+
/**
346+
* Called before entering the hot standby mode.
347+
*/
348+
int
349+
engine_begin_hot_standby(void);
350+
340351
/**
341352
* Called at the end of recovery.
342-
* Build secondary keys in all spaces.
343353
*/
344354
int
345355
engine_end_recovery(void);
@@ -395,6 +405,7 @@ int generic_engine_bootstrap(struct engine *);
395405
int generic_engine_begin_initial_recovery(struct engine *,
396406
const struct vclock *);
397407
int generic_engine_begin_final_recovery(struct engine *);
408+
int generic_engine_begin_hot_standby(struct engine *);
398409
int generic_engine_end_recovery(struct engine *);
399410
int generic_engine_begin_checkpoint(struct engine *, bool);
400411
int generic_engine_wait_checkpoint(struct engine *, const struct vclock *);
@@ -478,6 +489,13 @@ engine_begin_final_recovery_xc(void)
478489
diag_raise();
479490
}
480491

492+
static inline void
493+
engine_begin_hot_standby_xc(void)
494+
{
495+
if (engine_begin_hot_standby() != 0)
496+
diag_raise();
497+
}
498+
481499
static inline void
482500
engine_end_recovery_xc(void)
483501
{

src/box/gc.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ gc_init(void)
117117
gc_tree_new(&gc.consumers);
118118
fiber_cond_create(&gc.cleanup_cond);
119119
checkpoint_schedule_cfg(&gc.checkpoint_schedule, 0, 0);
120-
engine_collect_garbage(&gc.vclock);
121120

122121
gc.cleanup_fiber = fiber_new("gc", gc_cleanup_fiber_f);
123122
if (gc.cleanup_fiber == NULL)

src/box/memtx_engine.c

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -356,22 +356,41 @@ memtx_engine_begin_final_recovery(struct engine *engine)
356356
return 0;
357357
}
358358

359+
static int
360+
memtx_engine_begin_hot_standby(struct engine *engine)
361+
{
362+
struct memtx_engine *memtx = (struct memtx_engine *)engine;
363+
/*
364+
* Build secondary indexes before entering the hot standby mode
365+
* to quickly switch to the hot standby instance after the master
366+
* instance exits.
367+
*/
368+
if (memtx->state != MEMTX_OK) {
369+
assert(memtx->state == MEMTX_FINAL_RECOVERY);
370+
memtx->state = MEMTX_OK;
371+
if (space_foreach(memtx_build_secondary_keys, memtx) != 0)
372+
return -1;
373+
}
374+
return 0;
375+
}
376+
359377
static int
360378
memtx_engine_end_recovery(struct engine *engine)
361379
{
362380
struct memtx_engine *memtx = (struct memtx_engine *)engine;
363381
/*
364-
* Recovery is started with enabled keys when:
365-
* - either of force_recovery
366-
* is false
382+
* Secondary keys have already been built in the following cases:
383+
* - force_recovery is set
367384
* - it's a replication join
385+
* - instance was in the hot standby mode
368386
*/
369387
if (memtx->state != MEMTX_OK) {
370388
assert(memtx->state == MEMTX_FINAL_RECOVERY);
371389
memtx->state = MEMTX_OK;
372390
if (space_foreach(memtx_build_secondary_keys, memtx) != 0)
373391
return -1;
374392
}
393+
xdir_collect_inprogress(&memtx->snap_dir);
375394
return 0;
376395
}
377396

@@ -863,7 +882,6 @@ memtx_engine_collect_garbage(struct engine *engine, const struct vclock *vclock)
863882
struct memtx_engine *memtx = (struct memtx_engine *)engine;
864883
xdir_collect_garbage(&memtx->snap_dir, vclock_sum(vclock),
865884
XDIR_GC_ASYNC);
866-
xdir_collect_inprogress(&memtx->snap_dir);
867885
}
868886

869887
static int
@@ -1047,6 +1065,7 @@ static const struct engine_vtab memtx_engine_vtab = {
10471065
/* .bootstrap = */ memtx_engine_bootstrap,
10481066
/* .begin_initial_recovery = */ memtx_engine_begin_initial_recovery,
10491067
/* .begin_final_recovery = */ memtx_engine_begin_final_recovery,
1068+
/* .begin_hot_standby = */ memtx_engine_begin_hot_standby,
10501069
/* .end_recovery = */ memtx_engine_end_recovery,
10511070
/* .begin_checkpoint = */ memtx_engine_begin_checkpoint,
10521071
/* .wait_checkpoint = */ memtx_engine_wait_checkpoint,

src/box/service_engine.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ static const struct engine_vtab service_engine_vtab = {
106106
/* .bootstrap = */ generic_engine_bootstrap,
107107
/* .begin_initial_recovery = */ generic_engine_begin_initial_recovery,
108108
/* .begin_final_recovery = */ generic_engine_begin_final_recovery,
109+
/* .begin_hot_standby = */ generic_engine_begin_hot_standby,
109110
/* .end_recovery = */ generic_engine_end_recovery,
110111
/* .begin_checkpoint = */ generic_engine_begin_checkpoint,
111112
/* .wait_checkpoint = */ generic_engine_wait_checkpoint,

src/box/sysview.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ static const struct engine_vtab sysview_engine_vtab = {
578578
/* .bootstrap = */ generic_engine_bootstrap,
579579
/* .begin_initial_recovery = */ generic_engine_begin_initial_recovery,
580580
/* .begin_final_recovery = */ generic_engine_begin_final_recovery,
581+
/* .begin_hot_standby = */ generic_engine_begin_hot_standby,
581582
/* .end_recovery = */ generic_engine_end_recovery,
582583
/* .begin_checkpoint = */ generic_engine_begin_checkpoint,
583584
/* .wait_checkpoint = */ generic_engine_wait_checkpoint,

src/box/vinyl.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4521,6 +4521,7 @@ static const struct engine_vtab vinyl_engine_vtab = {
45214521
/* .bootstrap = */ vinyl_engine_bootstrap,
45224522
/* .begin_initial_recovery = */ vinyl_engine_begin_initial_recovery,
45234523
/* .begin_final_recovery = */ vinyl_engine_begin_final_recovery,
4524+
/* .begin_hot_standby = */ generic_engine_begin_hot_standby,
45244525
/* .end_recovery = */ vinyl_engine_end_recovery,
45254526
/* .begin_checkpoint = */ vinyl_engine_begin_checkpoint,
45264527
/* .wait_checkpoint = */ vinyl_engine_wait_checkpoint,

src/box/xlog.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,8 @@ xlog_rename(struct xlog *l)
771771
memcpy(new_filename, filename, suffix - filename);
772772
new_filename[suffix - filename] = '\0';
773773

774+
ERROR_INJECT_SLEEP(ERRINJ_XLOG_RENAME_DELAY);
775+
774776
if (rename(filename, new_filename) != 0) {
775777
say_syserror("can't rename %s to %s", filename, new_filename);
776778
diag_set(SystemError, "failed to rename '%s' file",

src/lib/core/errinj.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ struct errinj {
161161
_(ERRINJ_XLOG_GARBAGE, ERRINJ_BOOL, {.bparam = false}) \
162162
_(ERRINJ_XLOG_META, ERRINJ_BOOL, {.bparam = false}) \
163163
_(ERRINJ_XLOG_READ, ERRINJ_INT, {.iparam = -1}) \
164+
_(ERRINJ_XLOG_RENAME_DELAY, ERRINJ_BOOL, {.bparam = false}) \
164165

165166
ENUM0(errinj_id, ERRINJ_LIST);
166167
extern struct errinj errinjs[];

test/box/errinj.result

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ evals
131131
- ERRINJ_XLOG_GARBAGE: false
132132
- ERRINJ_XLOG_META: false
133133
- ERRINJ_XLOG_READ: -1
134+
- ERRINJ_XLOG_RENAME_DELAY: false
134135
...
135136
errinj.set("some-injection", true)
136137
---
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
-- test-run result file version 2
2+
test_run = require('test_run').new()
3+
| ---
4+
| ...
5+
fiber = require('fiber')
6+
| ---
7+
| ...
8+
fio = require('fio')
9+
| ---
10+
| ...
11+
12+
--
13+
-- gh-6554: GC removes xlog.inprogress files.
14+
--
15+
assert(box.cfg.wal_dir == box.cfg.memtx_dir)
16+
| ---
17+
| - true
18+
| ...
19+
20+
function count_inprogress() \
21+
return #fio.glob(box.cfg.wal_dir .. '/*.xlog.inprogress') \
22+
end
23+
| ---
24+
| ...
25+
26+
-- Run GC after each checkpoint.
27+
checkpoint_count = box.cfg.checkpoint_count
28+
| ---
29+
| ...
30+
box.cfg{checkpoint_count = 1}
31+
| ---
32+
| ...
33+
34+
s = box.schema.create_space('test')
35+
| ---
36+
| ...
37+
_ = s:create_index('pk')
38+
| ---
39+
| ...
40+
box.snapshot()
41+
| ---
42+
| - ok
43+
| ...
44+
45+
-- Suspend GC.
46+
files = box.backup.start()
47+
| ---
48+
| ...
49+
50+
-- Create a checkpoint.
51+
_ = s:replace{1}
52+
| ---
53+
| ...
54+
box.snapshot()
55+
| ---
56+
| - ok
57+
| ...
58+
59+
-- Block a writer on xlog.inprogress -> xlog rename.
60+
box.error.injection.set('ERRINJ_XLOG_RENAME_DELAY', true)
61+
| ---
62+
| - ok
63+
| ...
64+
c = fiber.channel()
65+
| ---
66+
| ...
67+
_ = fiber.create(function() local r = pcall(s.replace, s, {1}) c:put(r) end)
68+
| ---
69+
| ...
70+
_ = test_run:wait_cond(function() return count_inprogress() > 0 end)
71+
| ---
72+
| ...
73+
assert(count_inprogress() == 1)
74+
| ---
75+
| - true
76+
| ...
77+
78+
-- Resume GC and wait for it to delete old files.
79+
box.backup.stop()
80+
| ---
81+
| ...
82+
for _, f in ipairs(files) do \
83+
test_run:wait_cond(function() \
84+
return not fio.path.exists(f) \
85+
end) \
86+
end
87+
| ---
88+
| ...
89+
90+
-- The xlog.inprogress file shouldn't be deleted by GC.
91+
assert(count_inprogress() == 1)
92+
| ---
93+
| - true
94+
| ...
95+
96+
-- Resume the blocked writer and check that it succeeds.
97+
box.error.injection.set('ERRINJ_XLOG_RENAME_DELAY', false)
98+
| ---
99+
| - ok
100+
| ...
101+
assert(c:get() == true)
102+
| ---
103+
| - true
104+
| ...
105+
106+
-- The xlog.inprogress file was renamed.
107+
assert(count_inprogress() == 0)
108+
| ---
109+
| - true
110+
| ...
111+
112+
-- Cleanup.
113+
s:drop()
114+
| ---
115+
| ...
116+
box.cfg{checkpoint_count = checkpoint_count}
117+
| ---
118+
| ...

0 commit comments

Comments
 (0)