Skip to content

Commit 834dd0d

Browse files
Compaction by tombstone ratio in simpledb (#46)
This commit introduces a new compaction kind that will compact sstables when they have more than 20% of their keys tombstoned. This should save space when preserving them, as a later compaction can clean them up again. This trades-off available disk bandwidth and CPU with disk space. Co-authored-by: Thomas Jungblut <tjungblu@redhat.com>
1 parent 5113b2e commit 834dd0d

5 files changed

+69
-15
lines changed

simpledb/compaction.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func backgroundCompaction(db *DB) {
5555
}
5656

5757
func executeCompaction(db *DB) (compactionMetadata *proto.CompactionMetadata, err error) {
58-
compactionAction := db.sstableManager.candidateTablesForCompaction(db.compactedMaxSizeBytes)
58+
compactionAction := db.sstableManager.candidateTablesForCompaction(db.compactedMaxSizeBytes, db.compactionRatio)
5959
paths := compactionAction.pathsToCompact
6060
numRecords := compactionAction.totalRecords
6161
if len(paths) <= db.compactionFileThreshold {

simpledb/compaction_test.go

+3-5
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,8 @@ func TestCompactionWithTombstonesBeyondMaxSize(t *testing.T) {
164164

165165
compactionMeta, err := executeCompaction(db)
166166
assert.Nil(t, err)
167-
// TODO(thomas): this should also compact the 42 table, as it wastes a ton of space in tombstones
168-
assert.Equal(t, "sstable_000000000000043", compactionMeta.ReplacementPath)
169-
assert.Equal(t, []string{"sstable_000000000000043"}, compactionMeta.SstablePaths)
167+
assert.Equal(t, "sstable_000000000000042", compactionMeta.ReplacementPath)
168+
assert.Equal(t, []string{"sstable_000000000000042", "sstable_000000000000043"}, compactionMeta.SstablePaths)
170169

171170
err = db.sstableManager.reflectCompactionResult(compactionMeta)
172171
assert.NoError(t, err)
@@ -175,8 +174,7 @@ func TestCompactionWithTombstonesBeyondMaxSize(t *testing.T) {
175174
assert.Equal(t, "512", v)
176175
// for cleanups
177176
assert.Nil(t, db.sstableManager.currentReader.Close())
178-
// TODO(thomas): ideally that table should only be 10
179-
assert.Equal(t, 310, int(db.sstableManager.currentSSTable().MetaData().NumRecords))
177+
assert.Equal(t, 10, int(db.sstableManager.currentSSTable().MetaData().NumRecords))
180178
}
181179

182180
func writeSSTableWithDataInDatabaseFolder(t *testing.T, db *DB, p string) {

simpledb/db.go

+21
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package simpledb
22

33
import (
44
"errors"
5+
"fmt"
56
"os"
67
"sync"
78
"time"
@@ -25,6 +26,7 @@ const MemStoreMaxSizeBytes uint64 = 1024 * 1024 * 1024 // 1gb
2526
const NumSSTablesToTriggerCompaction int = 10
2627
const DefaultCompactionMaxSizeBytes uint64 = 5 * 1024 * 1024 * 1024 // 5gb
2728
const DefaultCompactionInterval = 5 * time.Second
29+
const DefaultCompactionRatio = float32(0.2)
2830
const DefaultWriteBufferSizeBytes uint64 = 4 * 1024 * 1024 // 4Mb
2931
const DefaultReadBufferSizeBytes uint64 = 4 * 1024 * 1024 // 4Mb
3032

@@ -73,6 +75,7 @@ type DB struct {
7375
memstoreMaxSize uint64
7476
compactionFileThreshold int
7577
compactionInterval time.Duration
78+
compactionRatio float32
7679
compactedMaxSizeBytes uint64
7780
enableCompactions bool
7881
enableAsyncWAL bool
@@ -331,6 +334,7 @@ func NewSimpleDB(basePath string, extraOptions ...ExtraOption) (*DB, error) {
331334
NumSSTablesToTriggerCompaction,
332335
DefaultCompactionMaxSizeBytes,
333336
DefaultCompactionInterval,
337+
DefaultCompactionRatio,
334338
DefaultWriteBufferSizeBytes,
335339
DefaultReadBufferSizeBytes,
336340
}
@@ -361,6 +365,7 @@ func NewSimpleDB(basePath string, extraOptions ...ExtraOption) (*DB, error) {
361365
enableAsyncWAL: extraOpts.enableAsyncWAL,
362366
enableDirectIOWAL: extraOpts.enableDirectIOWAL,
363367
compactionInterval: extraOpts.compactionRunInterval,
368+
compactionRatio: extraOpts.compactionRatio,
364369
closed: false,
365370
rwLock: rwLock,
366371
wal: nil,
@@ -385,6 +390,7 @@ type ExtraOptions struct {
385390
compactionFileThreshold int
386391
compactionMaxSizeBytes uint64
387392
compactionRunInterval time.Duration
393+
compactionRatio float32
388394
writeBufferSizeBytes uint64
389395
readBufferSizeBytes uint64
390396
}
@@ -428,6 +434,21 @@ func CompactionRunInterval(interval time.Duration) ExtraOption {
428434
}
429435
}
430436

437+
// CompactionRatio configures when a sstable is eligible for compaction through a ratio threshold, which can be used to save disk space.
438+
// The ratio is measured as the amount of tombstoned keys divided by the overall record number in the sstable.
439+
// This threshold must be between 0.0 and 1.0 as float32 and by default is DefaultCompactionRatio.
440+
// So when a sstable has more than 20% of records flagged as tombstones, it will be automatically compacted.
441+
// A value of 1.0 turns this feature off and resorts to the max size calculation, a value of 0.0 will always compact
442+
// all files regardless of how many tombstones are in there.
443+
func CompactionRatio(ratio float32) ExtraOption {
444+
if ratio < 0.0 || ratio > 1.0 {
445+
panic(fmt.Sprintf("invalid compaction ratio: %f, must be between 0 and 1", ratio))
446+
}
447+
return func(args *ExtraOptions) {
448+
args.compactionRatio = ratio
449+
}
450+
}
451+
431452
// CompactionFileThreshold tells how often SSTables are being compacted, this is measured in the number of SSTables.
432453
// The default is 10, which in turn will compact into a single SSTable.
433454
func CompactionFileThreshold(n int) ExtraOption {

simpledb/sstable_manager.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -112,13 +112,18 @@ func (s *SSTableManager) currentSSTable() sstables.SSTableReaderI {
112112
return s.currentReader
113113
}
114114

115-
func (s *SSTableManager) candidateTablesForCompaction(compactionMaxSizeBytes uint64) compactionAction {
115+
func (s *SSTableManager) candidateTablesForCompaction(compactionMaxSizeBytes uint64, compactionRatio float32) compactionAction {
116116
s.managerLock.RLock()
117117
defer s.managerLock.RUnlock()
118118

119119
var selectedForCompaction []bool
120120
for i := 0; i < len(s.allSSTableReaders); i++ {
121-
selectedForCompaction = append(selectedForCompaction, s.allSSTableReaders[i].MetaData().TotalBytes < compactionMaxSizeBytes)
121+
selected := s.allSSTableReaders[i].MetaData().TotalBytes < compactionMaxSizeBytes
122+
if s.allSSTableReaders[i].MetaData().NumRecords > 0 {
123+
tombstoneRatio := float32(s.allSSTableReaders[i].MetaData().NullValues) / float32(s.allSSTableReaders[i].MetaData().NumRecords)
124+
selected = selected || tombstoneRatio >= compactionRatio
125+
}
126+
selectedForCompaction = append(selectedForCompaction, selected)
122127
}
123128

124129
/*

simpledb/sstable_manager_test.go

+37-7
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,40 @@ func TestSSTableManagerSelectCompactionCandidates(t *testing.T) {
9595
path: "4",
9696
})
9797

98-
assertCompactionAction(t, 0, []string{"4"}, manager.candidateTablesForCompaction(25))
99-
assertCompactionAction(t, 105, []string{"2", "3", "4"}, manager.candidateTablesForCompaction(51))
100-
assertCompactionAction(t, 115, []string{"1", "2", "3", "4"}, manager.candidateTablesForCompaction(101))
101-
assertCompactionAction(t, 115, []string{"1", "2", "3", "4"}, manager.candidateTablesForCompaction(1500))
98+
assertCompactionAction(t, 0, []string{"4"}, manager.candidateTablesForCompaction(25, 1))
99+
assertCompactionAction(t, 105, []string{"2", "3", "4"}, manager.candidateTablesForCompaction(51, 1))
100+
assertCompactionAction(t, 115, []string{"1", "2", "3", "4"}, manager.candidateTablesForCompaction(101, 1))
101+
assertCompactionAction(t, 115, []string{"1", "2", "3", "4"}, manager.candidateTablesForCompaction(1500, 1))
102+
}
103+
104+
func TestSSTableManagerSelectCompactionCandidatesTombstoneRatios(t *testing.T) {
105+
manager := NewSSTableManager(skiplist.BytesComparator{}, &sync.RWMutex{}, "")
106+
107+
manager.addReader(&MockSSTableReader{
108+
metadata: &proto.MetaData{NumRecords: 10, NullValues: 8, TotalBytes: 1000},
109+
path: "1",
110+
})
111+
112+
manager.addReader(&MockSSTableReader{
113+
metadata: &proto.MetaData{NumRecords: 5, NullValues: 0, TotalBytes: 1000},
114+
path: "2",
115+
})
116+
117+
manager.addReader(&MockSSTableReader{
118+
metadata: &proto.MetaData{NumRecords: 100, NullValues: 10, TotalBytes: 1000},
119+
path: "3",
120+
})
121+
122+
manager.addReader(&MockSSTableReader{
123+
metadata: &proto.MetaData{NumRecords: 0, NullValues: 0, TotalBytes: 1000},
124+
path: "4",
125+
})
126+
127+
assertCompactionAction(t, 10, []string{"1"}, manager.candidateTablesForCompaction(999, 0.2))
128+
// 1 and 3 should be selected by ratio, 2 is here for the ride because of flood filling
129+
assertCompactionAction(t, 115, []string{"1", "2", "3"}, manager.candidateTablesForCompaction(999, 0.1))
130+
assertCompactionAction(t, 115, []string{"1", "2", "3"}, manager.candidateTablesForCompaction(999, 0))
131+
assertCompactionAction(t, 0, nil, manager.candidateTablesForCompaction(999, 1))
102132
}
103133

104134
func TestSSTableManagerSelectCompactionCandidatesEmptyStart(t *testing.T) {
@@ -124,8 +154,8 @@ func TestSSTableManagerSelectCompactionCandidatesEmptyStart(t *testing.T) {
124154
path: "4",
125155
})
126156

127-
assertCompactionAction(t, 5, []string{"1", "2", "3"}, manager.candidateTablesForCompaction(100))
128-
assertCompactionAction(t, 30, []string{"1", "2", "3", "4"}, manager.candidateTablesForCompaction(200))
157+
assertCompactionAction(t, 5, []string{"1", "2", "3"}, manager.candidateTablesForCompaction(100, 1))
158+
assertCompactionAction(t, 30, []string{"1", "2", "3", "4"}, manager.candidateTablesForCompaction(200, 1))
129159
}
130160

131161
func TestSSTableManagerSelectCompactionCandidatesTombstonedHoles(t *testing.T) {
@@ -151,7 +181,7 @@ func TestSSTableManagerSelectCompactionCandidatesTombstonedHoles(t *testing.T) {
151181
path: "4",
152182
})
153183

154-
assertCompactionAction(t, 3010, []string{"2", "3", "4"}, manager.candidateTablesForCompaction(2000))
184+
assertCompactionAction(t, 3010, []string{"2", "3", "4"}, manager.candidateTablesForCompaction(2000, 1))
155185
}
156186

157187
func assertCompactionAction(t *testing.T, numRecords int, paths []string, actualAction compactionAction) {

0 commit comments

Comments
 (0)