Skip to content

Commit 18c2f29

Browse files
committed
fix garbage collection logic and add tests
Signed-off-by: Sanskar Jaiswal <jaiswalsanskar078@gmail.com>
1 parent 2a62747 commit 18c2f29

File tree

4 files changed

+302
-16
lines changed

4 files changed

+302
-16
lines changed

controllers/gitrepository_controller_test.go

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,154 @@ func TestGitRepositoryReconciler_reconcileInclude(t *testing.T) {
11021102
}
11031103
}
11041104

1105+
func TestGitRepositoryReconciler_reconcileStorage(t *testing.T) {
1106+
tests := []struct {
1107+
name string
1108+
beforeFunc func(obj *sourcev1.GitRepository, storage *Storage) error
1109+
want sreconcile.Result
1110+
wantErr bool
1111+
assertArtifact *sourcev1.Artifact
1112+
assertConditions []metav1.Condition
1113+
assertPaths []string
1114+
}{
1115+
{
1116+
name: "garbage collects",
1117+
beforeFunc: func(obj *sourcev1.GitRepository, storage *Storage) error {
1118+
revisions := []string{"a", "b", "c", "d"}
1119+
for n := range revisions {
1120+
v := revisions[n]
1121+
obj.Status.Artifact = &sourcev1.Artifact{
1122+
Path: fmt.Sprintf("/reconcile-storage/%s.txt", v),
1123+
Revision: v,
1124+
}
1125+
if err := testStorage.MkdirAll(*obj.Status.Artifact); err != nil {
1126+
return err
1127+
}
1128+
if err := testStorage.AtomicWriteFile(obj.Status.Artifact, strings.NewReader(v), 0644); err != nil {
1129+
return err
1130+
}
1131+
if n != len(revisions)-1 {
1132+
time.Sleep(time.Second * 1)
1133+
}
1134+
}
1135+
testStorage.SetArtifactURL(obj.Status.Artifact)
1136+
return nil
1137+
},
1138+
assertArtifact: &sourcev1.Artifact{
1139+
Path: "/reconcile-storage/d.txt",
1140+
Revision: "d",
1141+
Checksum: "18ac3e7343f016890c510e93f935261169d9e3f565436429830faf0934f4f8e4",
1142+
URL: testStorage.Hostname + "/reconcile-storage/d.txt",
1143+
Size: int64p(int64(len("d"))),
1144+
},
1145+
assertPaths: []string{
1146+
"/reconcile-storage/d.txt",
1147+
"/reconcile-storage/c.txt",
1148+
"!/reconcile-storage/b.txt",
1149+
"!/reconcile-storage/a.txt",
1150+
},
1151+
want: sreconcile.ResultSuccess,
1152+
},
1153+
{
1154+
name: "notices missing artifact in storage",
1155+
beforeFunc: func(obj *sourcev1.GitRepository, storage *Storage) error {
1156+
obj.Status.Artifact = &sourcev1.Artifact{
1157+
Path: "/reconcile-storage/invalid.txt",
1158+
Revision: "e",
1159+
}
1160+
testStorage.SetArtifactURL(obj.Status.Artifact)
1161+
return nil
1162+
},
1163+
want: sreconcile.ResultSuccess,
1164+
assertPaths: []string{
1165+
"!/reconcile-storage/invalid.txt",
1166+
},
1167+
assertConditions: []metav1.Condition{
1168+
*conditions.TrueCondition(meta.ReconcilingCondition, "NoArtifact", "no artifact for resource in storage"),
1169+
},
1170+
},
1171+
{
1172+
name: "updates hostname on diff from current",
1173+
beforeFunc: func(obj *sourcev1.GitRepository, storage *Storage) error {
1174+
obj.Status.Artifact = &sourcev1.Artifact{
1175+
Path: "/reconcile-storage/hostname.txt",
1176+
Revision: "f",
1177+
Checksum: "3b9c358f36f0a31b6ad3e14f309c7cf198ac9246e8316f9ce543d5b19ac02b80",
1178+
URL: "http://outdated.com/reconcile-storage/hostname.txt",
1179+
}
1180+
if err := testStorage.MkdirAll(*obj.Status.Artifact); err != nil {
1181+
return err
1182+
}
1183+
if err := testStorage.AtomicWriteFile(obj.Status.Artifact, strings.NewReader("file"), 0644); err != nil {
1184+
return err
1185+
}
1186+
return nil
1187+
},
1188+
want: sreconcile.ResultSuccess,
1189+
assertPaths: []string{
1190+
"/reconcile-storage/hostname.txt",
1191+
},
1192+
assertArtifact: &sourcev1.Artifact{
1193+
Path: "/reconcile-storage/hostname.txt",
1194+
Revision: "f",
1195+
Checksum: "3b9c358f36f0a31b6ad3e14f309c7cf198ac9246e8316f9ce543d5b19ac02b80",
1196+
URL: testStorage.Hostname + "/reconcile-storage/hostname.txt",
1197+
Size: int64p(int64(len("file"))),
1198+
},
1199+
},
1200+
}
1201+
for _, tt := range tests {
1202+
t.Run(tt.name, func(t *testing.T) {
1203+
g := NewWithT(t)
1204+
1205+
defer func() {
1206+
for _, p := range tt.assertPaths {
1207+
if !strings.HasPrefix(p, "!") {
1208+
g.Expect(os.Remove(filepath.Join(testStorage.BasePath, p))).To(Succeed())
1209+
}
1210+
}
1211+
}()
1212+
1213+
r := &GitRepositoryReconciler{
1214+
EventRecorder: record.NewFakeRecorder(32),
1215+
Storage: testStorage,
1216+
artifactRetentionRecords: 2,
1217+
artifactRetentionTTL: 2 * time.Second,
1218+
}
1219+
1220+
obj := &sourcev1.GitRepository{
1221+
ObjectMeta: metav1.ObjectMeta{
1222+
GenerateName: "test-",
1223+
},
1224+
}
1225+
if tt.beforeFunc != nil {
1226+
g.Expect(tt.beforeFunc(obj, testStorage)).To(Succeed())
1227+
}
1228+
1229+
var c *git.Commit
1230+
var as artifactSet
1231+
got, err := r.reconcileStorage(context.TODO(), obj, c, &as, "")
1232+
g.Expect(err != nil).To(Equal(tt.wantErr))
1233+
g.Expect(got).To(Equal(tt.want))
1234+
1235+
g.Expect(obj.Status.Artifact).To(MatchArtifact(tt.assertArtifact))
1236+
if tt.assertArtifact != nil && tt.assertArtifact.URL != "" {
1237+
g.Expect(obj.Status.Artifact.URL).To(Equal(tt.assertArtifact.URL))
1238+
}
1239+
g.Expect(obj.Status.Conditions).To(conditions.MatchConditions(tt.assertConditions))
1240+
1241+
for _, p := range tt.assertPaths {
1242+
absoluteP := filepath.Join(testStorage.BasePath, p)
1243+
if !strings.HasPrefix(p, "!") {
1244+
g.Expect(absoluteP).To(BeAnExistingFile())
1245+
continue
1246+
}
1247+
g.Expect(absoluteP).NotTo(BeAnExistingFile())
1248+
}
1249+
})
1250+
}
1251+
}
1252+
11051253
func TestGitRepositoryReconciler_reconcileDelete(t *testing.T) {
11061254
g := NewWithT(t)
11071255

controllers/storage.go

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -146,28 +146,41 @@ func (s *Storage) RemoveAllButCurrent(artifact sourcev1.Artifact) ([]string, err
146146
return deletedFiles, nil
147147
}
148148

149-
func (s *Storage) getGarbageFiles(artifact sourcev1.Artifact, n int, ttl time.Duration) []string {
149+
// getGarbageFiles returns all files that need to be garbage collected for the given artifact.
150+
// Garbage files are determined based on the below flow:
151+
// 1. collect all files with an expired ttl
152+
// 2. if we satisfy maxItemsToBeRetained, then return
153+
// 3. else, remove all files till the latest n files remain, where n=maxItemsToBeRetained
154+
func (s *Storage) getGarbageFiles(artifact sourcev1.Artifact, maxItemsToBeRetained int, ttl time.Duration) []string {
150155
localPath := s.LocalPath(artifact)
151156
dir := filepath.Dir(localPath)
152157
garbageFiles := []string{}
153158
filesWithCreatedTs := make(map[time.Time]string)
154159
// sortedPaths contain all files sorted according to their created ts.
155160
sortedPaths := []string{}
156161
now := time.Now().UTC()
162+
totalFiles := 0
157163
_ = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
158164
createdAt := info.ModTime().UTC()
159165
diff := now.Sub(createdAt)
160166
// compare the time difference between now and the time at which the file was created
161167
// with the provided ttl. delete if difference is greater than the ttl.
162168
expired := diff > ttl
163-
if path != localPath && !info.IsDir() && info.Mode()&os.ModeSymlink != os.ModeSymlink && expired {
164-
garbageFiles = append(garbageFiles, path)
165-
}
166169
if !info.IsDir() && info.Mode()&os.ModeSymlink != os.ModeSymlink {
170+
if path != localPath && expired {
171+
garbageFiles = append(garbageFiles, path)
172+
}
173+
totalFiles += 1
167174
filesWithCreatedTs[info.ModTime().UTC()] = path
168175
}
169176
return nil
170177
})
178+
// We already collected enough garbage files to satisfy the no of max
179+
// items that are supposed to be retained, so exit early.
180+
if totalFiles-len(garbageFiles) < maxItemsToBeRetained {
181+
return garbageFiles
182+
}
183+
171184
creationTimestamps := []time.Time{}
172185
for ts := range filesWithCreatedTs {
173186
creationTimestamps = append(creationTimestamps, ts)
@@ -182,11 +195,25 @@ func (s *Storage) getGarbageFiles(artifact sourcev1.Artifact, n int, ttl time.Du
182195
sortedPaths = append(sortedPaths, path)
183196
}
184197

185-
for i, path := range sortedPaths {
186-
if path != localPath && !stringInSlice(path, garbageFiles) && len(sortedPaths) <= n {
187-
// append path to garbageFiles and remove it from sortedPaths
188-
garbageFiles = append(garbageFiles, path)
189-
sortedPaths = append(sortedPaths[:i], sortedPaths[i+1:]...)
198+
var collected int
199+
noOfGarbageFiles := len(garbageFiles)
200+
for _, path := range sortedPaths {
201+
if path != localPath && !stringInSlice(path, garbageFiles) {
202+
// If we previously collected a few garbage files with an expired ttl, then take that into account
203+
// when checking whether we need to remove more files to satisfy the max no of items allowed
204+
// in the filesystem, along with the no of files already removed in this loop.
205+
if noOfGarbageFiles > 0 {
206+
if (len(sortedPaths) - collected - len(garbageFiles)) > maxItemsToBeRetained {
207+
// append path to garbageFiles and remove it from sortedPaths
208+
garbageFiles = append(garbageFiles, path)
209+
collected += 1
210+
}
211+
} else {
212+
if len(sortedPaths)-collected > maxItemsToBeRetained {
213+
garbageFiles = append(garbageFiles, path)
214+
collected += 1
215+
}
216+
}
190217
}
191218
}
192219

@@ -195,8 +222,8 @@ func (s *Storage) getGarbageFiles(artifact sourcev1.Artifact, n int, ttl time.Du
195222

196223
// RemoveGarbageFiles removes all garabge files in the artifact dir according to the provided
197224
// retention options.
198-
func (s *Storage) RemoveGarbageFiles(artifact sourcev1.Artifact, n int, ttl time.Duration) ([]string, error) {
199-
garbageFiles := s.getGarbageFiles(artifact, n, ttl)
225+
func (s *Storage) RemoveGarbageFiles(artifact sourcev1.Artifact, maxItemsToBeRetained int, ttl time.Duration) ([]string, error) {
226+
garbageFiles := s.getGarbageFiles(artifact, maxItemsToBeRetained, ttl)
200227
var errors []string
201228
var deleted []string
202229
if len(garbageFiles) > 0 {

controllers/storage_test.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"os"
2525
"path"
2626
"path/filepath"
27+
"strings"
2728
"testing"
2829
"time"
2930

@@ -486,3 +487,107 @@ func TestStorageCopyFromPath(t *testing.T) {
486487
})
487488
}
488489
}
490+
491+
func TestStorage_getGarbageFiles(t *testing.T) {
492+
artifactFolder := path.Join("foo", "bar")
493+
tests := []struct {
494+
name string
495+
artifactPaths []string
496+
createPause time.Duration
497+
ttl time.Duration
498+
maxItemsToBeRetained int
499+
wantDeleted []string
500+
}{
501+
{
502+
name: "delete files based on maxItemsToBeRetained",
503+
artifactPaths: []string{
504+
path.Join(artifactFolder, "artifact1.tar.gz"),
505+
path.Join(artifactFolder, "artifact2.tar.gz"),
506+
path.Join(artifactFolder, "artifact3.tar.gz"),
507+
path.Join(artifactFolder, "artifact4.tar.gz"),
508+
path.Join(artifactFolder, "artifact5.tar.gz"),
509+
},
510+
createPause: time.Nanosecond,
511+
ttl: time.Minute * 2,
512+
maxItemsToBeRetained: 2,
513+
wantDeleted: []string{
514+
path.Join(artifactFolder, "artifact1.tar.gz"),
515+
path.Join(artifactFolder, "artifact2.tar.gz"),
516+
path.Join(artifactFolder, "artifact3.tar.gz"),
517+
},
518+
},
519+
{
520+
name: "delete files based on ttl",
521+
artifactPaths: []string{
522+
path.Join(artifactFolder, "artifact1.tar.gz"),
523+
path.Join(artifactFolder, "artifact2.tar.gz"),
524+
path.Join(artifactFolder, "artifact3.tar.gz"),
525+
path.Join(artifactFolder, "artifact4.tar.gz"),
526+
path.Join(artifactFolder, "artifact5.tar.gz"),
527+
},
528+
createPause: time.Second * 1,
529+
ttl: time.Second*3 + time.Millisecond*500,
530+
maxItemsToBeRetained: 4,
531+
wantDeleted: []string{
532+
path.Join(artifactFolder, "artifact1.tar.gz"),
533+
path.Join(artifactFolder, "artifact2.tar.gz"),
534+
},
535+
},
536+
{
537+
name: "delete files based on ttl and maxItemsToBeRetained",
538+
artifactPaths: []string{
539+
path.Join(artifactFolder, "artifact1.tar.gz"),
540+
path.Join(artifactFolder, "artifact2.tar.gz"),
541+
path.Join(artifactFolder, "artifact3.tar.gz"),
542+
path.Join(artifactFolder, "artifact4.tar.gz"),
543+
path.Join(artifactFolder, "artifact5.tar.gz"),
544+
path.Join(artifactFolder, "artifact6.tar.gz"),
545+
},
546+
createPause: time.Second * 1,
547+
ttl: time.Second*5 + time.Millisecond*500,
548+
maxItemsToBeRetained: 4,
549+
wantDeleted: []string{
550+
path.Join(artifactFolder, "artifact1.tar.gz"),
551+
path.Join(artifactFolder, "artifact2.tar.gz"),
552+
},
553+
},
554+
}
555+
556+
for _, tt := range tests {
557+
t.Run(tt.name, func(t *testing.T) {
558+
g := NewWithT(t)
559+
dir, err := os.MkdirTemp("", "")
560+
g.Expect(err).ToNot(HaveOccurred())
561+
t.Cleanup(func() { os.RemoveAll(dir) })
562+
563+
s, err := NewStorage(dir, "hostname", time.Minute)
564+
g.Expect(err).ToNot(HaveOccurred(), "failed to create new storage")
565+
566+
artifact := sourcev1.Artifact{
567+
Path: tt.artifactPaths[len(tt.artifactPaths)-1],
568+
}
569+
g.Expect(os.MkdirAll(path.Join(dir, artifactFolder), 0755)).ToNot(HaveOccurred())
570+
for _, artifactPath := range tt.artifactPaths {
571+
f, err := os.Create(path.Join(dir, artifactPath))
572+
g.Expect(err).ToNot(HaveOccurred())
573+
g.Expect(f.Close()).ToNot(HaveOccurred())
574+
time.Sleep(tt.createPause)
575+
}
576+
577+
deletedPaths := s.getGarbageFiles(artifact, tt.maxItemsToBeRetained, tt.ttl)
578+
g.Expect(len(tt.wantDeleted)).To(Equal(len(deletedPaths)))
579+
for _, wantDeletedPath := range tt.wantDeleted {
580+
present := false
581+
for _, deletedPath := range deletedPaths {
582+
if strings.Contains(deletedPath, wantDeletedPath) {
583+
present = true
584+
break
585+
}
586+
}
587+
if present == false {
588+
g.Fail(fmt.Sprintf("expected file to be deleted, still exists: %s", wantDeletedPath))
589+
}
590+
}
591+
})
592+
}
593+
}

0 commit comments

Comments
 (0)