22
22
import io .streamthoughts .kafka .connect .filepulse .source .FileObject ;
23
23
import io .streamthoughts .kafka .connect .filepulse .storage .StateBackingStore ;
24
24
import io .streamthoughts .kafka .connect .filepulse .storage .StateSnapshot ;
25
+ import org .apache .kafka .common .config .AbstractConfig ;
26
+ import org .apache .kafka .common .config .ConfigDef ;
25
27
import org .slf4j .Logger ;
26
28
import org .slf4j .LoggerFactory ;
27
29
30
32
import java .util .Map ;
31
33
import java .util .concurrent .TimeUnit ;
32
34
import java .util .concurrent .atomic .AtomicBoolean ;
35
+ import java .util .function .Consumer ;
33
36
34
37
/**
35
38
* An in-memory {@link StateBackingStore} implementation that uses an LRU cache based on HashMap.
@@ -40,22 +43,45 @@ public class InMemoryFileObjectStateBackingStore implements FileObjectStateBacki
40
43
41
44
private static final int DEFAULT_MAX_SIZE_CAPACITY = 10_000 ;
42
45
43
- private final Map <String , FileObject > objects ;
46
+ private volatile Map <String , FileObject > objects ;
44
47
45
48
private StateBackingStore .UpdateListener <FileObject > listener ;
46
49
47
50
private final AtomicBoolean started = new AtomicBoolean (false );
48
51
49
- public InMemoryFileObjectStateBackingStore () {
50
- this .objects = Collections .synchronizedMap (createLRUCache (DEFAULT_MAX_SIZE_CAPACITY ));
51
- }
52
+ /**
53
+ * Creates a new {@link InMemoryFileObjectStateBackingStore} instance.
54
+ */
55
+ public InMemoryFileObjectStateBackingStore () { }
52
56
53
57
@ VisibleForTesting
54
58
public InMemoryFileObjectStateBackingStore (final Map <String , FileObject > objects ) {
55
- this ( );
59
+ configure ( Collections . emptyMap () );
56
60
this .objects .putAll (objects );
57
61
}
58
62
63
+ /**
64
+ * {@inheritDoc}
65
+ */
66
+ @ Override
67
+ public void configure (final Map <String , ?> configs ) {
68
+ FileObjectStateBackingStore .super .configure (configs );
69
+ int cacheMaxCapacity = new Config (configs ).getCacheMaxCapacity ();
70
+ this .objects = Collections .synchronizedMap (createLRUCache (cacheMaxCapacity , objectEntry -> {
71
+ if (!objectEntry .getValue ().status ().isDone ()) {
72
+ LOG .warn (
73
+ "Evicting a file-object state '{}' from in-memory state with a non terminal"
74
+ + " status (i.e. 'CLEANED'). This may happen if you are processing more files than the"
75
+ + " max-capacity of the InMemoryFileObjectStateBackingStore before committing offsets"
76
+ + " for tasks successfully. Please consider increasing the value of"
77
+ + " 'tasks.file.status.storage.cache.max.size.capacity' through"
78
+ + " the connector's configuration." ,
79
+ objectEntry .getValue ().metadata ().stringURI ()
80
+ );
81
+ }
82
+ }));
83
+ }
84
+
59
85
/**
60
86
* {@inheritDoc}
61
87
*/
@@ -156,12 +182,56 @@ public UpdateListener<FileObject> getListener() {
156
182
return listener ;
157
183
}
158
184
159
- private static <K , V > Map <K , V > createLRUCache (final int maxCacheSize ) {
185
+ private static <K , V > Map <K , V > createLRUCache (final int maxCacheSize ,
186
+ final Consumer <Map .Entry <K , V >> callbackOnRemoveEldest ) {
160
187
return new LinkedHashMap <>(maxCacheSize + 1 , 1.01f , true ) {
161
188
@ Override
162
189
protected boolean removeEldestEntry (final Map .Entry <K , V > eldest ) {
163
- return size () > maxCacheSize ;
190
+ boolean remove = size () > maxCacheSize ;
191
+ if (remove ) {
192
+ callbackOnRemoveEldest .accept (eldest );
193
+ }
194
+ return remove ;
164
195
}
165
196
};
166
197
}
198
+
199
+ private static final class Config extends AbstractConfig {
200
+
201
+ private static final String GROUP = "InMemoryFileObjectStateBackingStore" ;
202
+
203
+ public static final String TASKS_FILE_STATUS_STORAGE_CACHE_MAX_SIZE_CAPACITY_CONFIG
204
+ = "tasks.file.status.storage.cache.max.size.capacity" ;
205
+ private static final String TASKS_FILE_STATUS_STORAGE_CACHE_MAX_SIZE_CAPACITY_DOC
206
+ = "The max size capacity of the LRU in-memory cache (default: 10_000)." ;
207
+
208
+ /**
209
+ * Creates a new {@link Config} instance.
210
+ *
211
+ * @param originals the configuration properties.
212
+ */
213
+ public Config (final Map <?, ?> originals ) {
214
+ super (configDef (), originals , false );
215
+ }
216
+
217
+ public int getCacheMaxCapacity () {
218
+ return this .getInt (TASKS_FILE_STATUS_STORAGE_CACHE_MAX_SIZE_CAPACITY_CONFIG );
219
+ }
220
+
221
+ private static ConfigDef configDef () {
222
+ int groupCounter = 0 ;
223
+ return new ConfigDef ()
224
+ .define (
225
+ TASKS_FILE_STATUS_STORAGE_CACHE_MAX_SIZE_CAPACITY_CONFIG ,
226
+ ConfigDef .Type .INT ,
227
+ DEFAULT_MAX_SIZE_CAPACITY ,
228
+ ConfigDef .Importance .LOW ,
229
+ TASKS_FILE_STATUS_STORAGE_CACHE_MAX_SIZE_CAPACITY_DOC ,
230
+ GROUP ,
231
+ groupCounter ++,
232
+ ConfigDef .Width .NONE ,
233
+ TASKS_FILE_STATUS_STORAGE_CACHE_MAX_SIZE_CAPACITY_CONFIG
234
+ );
235
+ }
236
+ }
167
237
}
0 commit comments