Skip to content

Commit 0cfb990

Browse files
committedDec 10, 2013
Release LevelDB 1.15
- switched from mmap based writing to simpler stdio based writing. Has a minor impact (0.5 microseconds) on microbenchmarks for asynchronous writes. Synchronous writes speed up from 30ms to 10ms on linux/ext4. Should be much more reliable on diverse platforms. - compaction errors now immediately put the database into a read-only mode (until it is re-opened). As a downside, a disk going out of space and then space being created will require a re-open to recover from, whereas previously that would happen automatically. On the plus side, many corruption possibilities go away. - force the DB to enter an error-state so that all future writes fail when a synchronous log write succeeds but the sync fails. - repair now regenerates sstables that exhibit problems - fix issue 218 - Use native memory barriers on OSX - fix issue 212 - QNX build is broken - fix build on iOS with xcode 5 - make tests compile and pass on windows
1 parent 0b9a89f commit 0cfb990

22 files changed

+320
-361
lines changed
 

‎Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ SHARED = $(SHARED1)
7272
else
7373
# Update db.h if you change these.
7474
SHARED_MAJOR = 1
75-
SHARED_MINOR = 14
75+
SHARED_MINOR = 15
7676
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
7777
SHARED2 = $(SHARED1).$(SHARED_MAJOR)
7878
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)

‎build_detect_platform

+10
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,16 @@ case "$TARGET_OS" in
131131
# man ld: +h internal_name
132132
PLATFORM_SHARED_LDFLAGS="-shared -Wl,+h -Wl,"
133133
;;
134+
IOS)
135+
PLATFORM=IOS
136+
COMMON_FLAGS="$MEMCMP_FLAG -DOS_MACOSX"
137+
[ -z "$INSTALL_PATH" ] && INSTALL_PATH=`pwd`
138+
PORT_FILE=port/port_posix.cc
139+
PLATFORM_SHARED_EXT=
140+
PLATFORM_SHARED_LDFLAGS=
141+
PLATFORM_SHARED_CFLAGS=
142+
PLATFORM_SHARED_VERSIONED=
143+
;;
134144
*)
135145
echo "Unknown platform!" >&2
136146
exit 1

‎db/corruption_test.cc

+24-2
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,13 @@ class CorruptionTest {
7575
Slice key = Key(i, &key_space);
7676
batch.Clear();
7777
batch.Put(key, Value(i, &value_space));
78-
ASSERT_OK(db_->Write(WriteOptions(), &batch));
78+
WriteOptions options;
79+
// Corrupt() doesn't work without this sync on windows; stat reports 0 for
80+
// the file size.
81+
if (i == n - 1) {
82+
options.sync = true;
83+
}
84+
ASSERT_OK(db_->Write(options, &batch));
7985
}
8086
}
8187

@@ -125,7 +131,7 @@ class CorruptionTest {
125131
FileType type;
126132
std::string fname;
127133
int picked_number = -1;
128-
for (int i = 0; i < filenames.size(); i++) {
134+
for (size_t i = 0; i < filenames.size(); i++) {
129135
if (ParseFileName(filenames[i], &number, &type) &&
130136
type == filetype &&
131137
int(number) > picked_number) { // Pick latest file
@@ -238,6 +244,22 @@ TEST(CorruptionTest, TableFile) {
238244
Check(90, 99);
239245
}
240246

247+
TEST(CorruptionTest, TableFileRepair) {
248+
options_.block_size = 2 * kValueSize; // Limit scope of corruption
249+
options_.paranoid_checks = true;
250+
Reopen();
251+
Build(100);
252+
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
253+
dbi->TEST_CompactMemTable();
254+
dbi->TEST_CompactRange(0, NULL, NULL);
255+
dbi->TEST_CompactRange(1, NULL, NULL);
256+
257+
Corrupt(kTableFile, 100, 1);
258+
RepairDB();
259+
Reopen();
260+
Check(95, 99);
261+
}
262+
241263
TEST(CorruptionTest, TableFileIndexData) {
242264
Build(10000); // Enough to build multiple Tables
243265
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);

‎db/db_bench.cc

+4-4
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ class RandomGenerator {
128128
pos_ = 0;
129129
}
130130

131-
Slice Generate(int len) {
131+
Slice Generate(size_t len) {
132132
if (pos_ + len > data_.size()) {
133133
pos_ = 0;
134134
assert(len < data_.size());
@@ -139,11 +139,11 @@ class RandomGenerator {
139139
};
140140

141141
static Slice TrimSpace(Slice s) {
142-
int start = 0;
142+
size_t start = 0;
143143
while (start < s.size() && isspace(s[start])) {
144144
start++;
145145
}
146-
int limit = s.size();
146+
size_t limit = s.size();
147147
while (limit > start && isspace(s[limit-1])) {
148148
limit--;
149149
}
@@ -399,7 +399,7 @@ class Benchmark {
399399
heap_counter_(0) {
400400
std::vector<std::string> files;
401401
Env::Default()->GetChildren(FLAGS_db, &files);
402-
for (int i = 0; i < files.size(); i++) {
402+
for (size_t i = 0; i < files.size(); i++) {
403403
if (Slice(files[i]).starts_with("heap-")) {
404404
Env::Default()->DeleteFile(std::string(FLAGS_db) + "/" + files[i]);
405405
}

‎db/db_impl.cc

+57-42
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,7 @@ DBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
133133
seed_(0),
134134
tmp_batch_(new WriteBatch),
135135
bg_compaction_scheduled_(false),
136-
manual_compaction_(NULL),
137-
consecutive_compaction_errors_(0) {
136+
manual_compaction_(NULL) {
138137
mem_->Ref();
139138
has_imm_.Release_Store(NULL);
140139

@@ -217,6 +216,12 @@ void DBImpl::MaybeIgnoreError(Status* s) const {
217216
}
218217

219218
void DBImpl::DeleteObsoleteFiles() {
219+
if (!bg_error_.ok()) {
220+
// After a background error, we don't know whether a new version may
221+
// or may not have been committed, so we cannot safely garbage collect.
222+
return;
223+
}
224+
220225
// Make a set of all of the live files
221226
std::set<uint64_t> live = pending_outputs_;
222227
versions_->AddLiveFiles(&live);
@@ -495,7 +500,7 @@ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit,
495500
return s;
496501
}
497502

498-
Status DBImpl::CompactMemTable() {
503+
void DBImpl::CompactMemTable() {
499504
mutex_.AssertHeld();
500505
assert(imm_ != NULL);
501506

@@ -523,9 +528,9 @@ Status DBImpl::CompactMemTable() {
523528
imm_ = NULL;
524529
has_imm_.Release_Store(NULL);
525530
DeleteObsoleteFiles();
531+
} else {
532+
RecordBackgroundError(s);
526533
}
527-
528-
return s;
529534
}
530535

531536
void DBImpl::CompactRange(const Slice* begin, const Slice* end) {
@@ -568,16 +573,18 @@ void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) {
568573
}
569574

570575
MutexLock l(&mutex_);
571-
while (!manual.done) {
572-
while (manual_compaction_ != NULL) {
573-
bg_cv_.Wait();
574-
}
575-
manual_compaction_ = &manual;
576-
MaybeScheduleCompaction();
577-
while (manual_compaction_ == &manual) {
576+
while (!manual.done && !shutting_down_.Acquire_Load() && bg_error_.ok()) {
577+
if (manual_compaction_ == NULL) { // Idle
578+
manual_compaction_ = &manual;
579+
MaybeScheduleCompaction();
580+
} else { // Running either my compaction or another compaction.
578581
bg_cv_.Wait();
579582
}
580583
}
584+
if (manual_compaction_ == &manual) {
585+
// Cancel my manual compaction since we aborted early for some reason.
586+
manual_compaction_ = NULL;
587+
}
581588
}
582589

583590
Status DBImpl::TEST_CompactMemTable() {
@@ -596,12 +603,22 @@ Status DBImpl::TEST_CompactMemTable() {
596603
return s;
597604
}
598605

606+
void DBImpl::RecordBackgroundError(const Status& s) {
607+
mutex_.AssertHeld();
608+
if (bg_error_.ok()) {
609+
bg_error_ = s;
610+
bg_cv_.SignalAll();
611+
}
612+
}
613+
599614
void DBImpl::MaybeScheduleCompaction() {
600615
mutex_.AssertHeld();
601616
if (bg_compaction_scheduled_) {
602617
// Already scheduled
603618
} else if (shutting_down_.Acquire_Load()) {
604619
// DB is being deleted; no more background compactions
620+
} else if (!bg_error_.ok()) {
621+
// Already got an error; no more changes
605622
} else if (imm_ == NULL &&
606623
manual_compaction_ == NULL &&
607624
!versions_->NeedsCompaction()) {
@@ -619,30 +636,12 @@ void DBImpl::BGWork(void* db) {
619636
void DBImpl::BackgroundCall() {
620637
MutexLock l(&mutex_);
621638
assert(bg_compaction_scheduled_);
622-
if (!shutting_down_.Acquire_Load()) {
623-
Status s = BackgroundCompaction();
624-
if (s.ok()) {
625-
// Success
626-
consecutive_compaction_errors_ = 0;
627-
} else if (shutting_down_.Acquire_Load()) {
628-
// Error most likely due to shutdown; do not wait
629-
} else {
630-
// Wait a little bit before retrying background compaction in
631-
// case this is an environmental problem and we do not want to
632-
// chew up resources for failed compactions for the duration of
633-
// the problem.
634-
bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
635-
Log(options_.info_log, "Waiting after background compaction error: %s",
636-
s.ToString().c_str());
637-
mutex_.Unlock();
638-
++consecutive_compaction_errors_;
639-
int seconds_to_sleep = 1;
640-
for (int i = 0; i < 3 && i < consecutive_compaction_errors_ - 1; ++i) {
641-
seconds_to_sleep *= 2;
642-
}
643-
env_->SleepForMicroseconds(seconds_to_sleep * 1000000);
644-
mutex_.Lock();
645-
}
639+
if (shutting_down_.Acquire_Load()) {
640+
// No more background work when shutting down.
641+
} else if (!bg_error_.ok()) {
642+
// No more background work after a background error.
643+
} else {
644+
BackgroundCompaction();
646645
}
647646

648647
bg_compaction_scheduled_ = false;
@@ -653,11 +652,12 @@ void DBImpl::BackgroundCall() {
653652
bg_cv_.SignalAll();
654653
}
655654

656-
Status DBImpl::BackgroundCompaction() {
655+
void DBImpl::BackgroundCompaction() {
657656
mutex_.AssertHeld();
658657

659658
if (imm_ != NULL) {
660-
return CompactMemTable();
659+
CompactMemTable();
660+
return;
661661
}
662662

663663
Compaction* c;
@@ -691,6 +691,9 @@ Status DBImpl::BackgroundCompaction() {
691691
c->edit()->AddFile(c->level() + 1, f->number, f->file_size,
692692
f->smallest, f->largest);
693693
status = versions_->LogAndApply(c->edit(), &mutex_);
694+
if (!status.ok()) {
695+
RecordBackgroundError(status);
696+
}
694697
VersionSet::LevelSummaryStorage tmp;
695698
Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
696699
static_cast<unsigned long long>(f->number),
@@ -701,6 +704,9 @@ Status DBImpl::BackgroundCompaction() {
701704
} else {
702705
CompactionState* compact = new CompactionState(c);
703706
status = DoCompactionWork(compact);
707+
if (!status.ok()) {
708+
RecordBackgroundError(status);
709+
}
704710
CleanupCompaction(compact);
705711
c->ReleaseInputs();
706712
DeleteObsoleteFiles();
@@ -714,9 +720,6 @@ Status DBImpl::BackgroundCompaction() {
714720
} else {
715721
Log(options_.info_log,
716722
"Compaction error: %s", status.ToString().c_str());
717-
if (options_.paranoid_checks && bg_error_.ok()) {
718-
bg_error_ = status;
719-
}
720723
}
721724

722725
if (is_manual) {
@@ -732,7 +735,6 @@ Status DBImpl::BackgroundCompaction() {
732735
}
733736
manual_compaction_ = NULL;
734737
}
735-
return status;
736738
}
737739

738740
void DBImpl::CleanupCompaction(CompactionState* compact) {
@@ -1002,6 +1004,9 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
10021004
if (status.ok()) {
10031005
status = InstallCompactionResults(compact);
10041006
}
1007+
if (!status.ok()) {
1008+
RecordBackgroundError(status);
1009+
}
10051010
VersionSet::LevelSummaryStorage tmp;
10061011
Log(options_.info_log,
10071012
"compacted to: %s", versions_->LevelSummary(&tmp));
@@ -1185,13 +1190,23 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
11851190
{
11861191
mutex_.Unlock();
11871192
status = log_->AddRecord(WriteBatchInternal::Contents(updates));
1193+
bool sync_error = false;
11881194
if (status.ok() && options.sync) {
11891195
status = logfile_->Sync();
1196+
if (!status.ok()) {
1197+
sync_error = true;
1198+
}
11901199
}
11911200
if (status.ok()) {
11921201
status = WriteBatchInternal::InsertInto(updates, mem_);
11931202
}
11941203
mutex_.Lock();
1204+
if (sync_error) {
1205+
// The state of the log file is indeterminate: the log record we
1206+
// just added may or may not show up when the DB is re-opened.
1207+
// So we force the DB into a mode where all future writes fail.
1208+
RecordBackgroundError(status);
1209+
}
11951210
}
11961211
if (updates == tmp_batch_) tmp_batch_->Clear();
11971212

‎db/db_impl.h

+5-4
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ class DBImpl : public DB {
8787

8888
// Compact the in-memory write buffer to disk. Switches to a new
8989
// log-file/memtable and writes a new descriptor iff successful.
90-
Status CompactMemTable()
91-
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
90+
// Errors are recorded in bg_error_.
91+
void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
9292

9393
Status RecoverLogFile(uint64_t log_number,
9494
VersionEdit* edit,
@@ -102,10 +102,12 @@ class DBImpl : public DB {
102102
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
103103
WriteBatch* BuildBatchGroup(Writer** last_writer);
104104

105+
void RecordBackgroundError(const Status& s);
106+
105107
void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
106108
static void BGWork(void* db);
107109
void BackgroundCall();
108-
Status BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
110+
void BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
109111
void CleanupCompaction(CompactionState* compact)
110112
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
111113
Status DoCompactionWork(CompactionState* compact)
@@ -170,7 +172,6 @@ class DBImpl : public DB {
170172

171173
// Have we encountered a background error in paranoid mode?
172174
Status bg_error_;
173-
int consecutive_compaction_errors_;
174175

175176
// Per level compaction stats. stats_[level] stores the stats for
176177
// compactions that produced data for the specified "level".

0 commit comments

Comments
 (0)