Skip to content

Commit b961d9b

Browse files
rluvatonronag
authored andcommitted
stream: add highWaterMark for the map operator
this is done so we don't wait for the first items to finish before starting new ones Fixes: #46132 Co-authored-by: Robert Nagy <ronagy@icloud.com> PR-URL: #49249 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Robert Nagy <ronagy@icloud.com>
1 parent fe34d63 commit b961d9b

File tree

4 files changed

+226
-16
lines changed

4 files changed

+226
-16
lines changed

doc/api/stream.md

+12
Original file line numberDiff line numberDiff line change
@@ -2012,6 +2012,10 @@ showBoth();
20122012
added:
20132013
- v17.4.0
20142014
- v16.14.0
2015+
changes:
2016+
- version: REPLACEME
2017+
pr-url: https://github.com/nodejs/node/pull/49249
2018+
description: added `highWaterMark` in options.
20152019
-->
20162020

20172021
> Stability: 1 - Experimental
@@ -2025,6 +2029,8 @@ added:
20252029
* `options` {Object}
20262030
* `concurrency` {number} the maximum concurrent invocation of `fn` to call
20272031
on the stream at once. **Default:** `1`.
2032+
* `highWaterMark` {number} how many items to buffer while waiting for user
2033+
consumption of the mapped items. **Default:** `concurrency * 2 - 1`.
20282034
* `signal` {AbortSignal} allows destroying the stream if the signal is
20292035
aborted.
20302036
* Returns: {Readable} a stream mapped with the function `fn`.
@@ -2059,6 +2065,10 @@ for await (const result of dnsResults) {
20592065
added:
20602066
- v17.4.0
20612067
- v16.14.0
2068+
changes:
2069+
- version: REPLACEME
2070+
pr-url: https://github.com/nodejs/node/pull/49249
2071+
description: added `highWaterMark` in options.
20622072
-->
20632073

20642074
> Stability: 1 - Experimental
@@ -2071,6 +2081,8 @@ added:
20712081
* `options` {Object}
20722082
* `concurrency` {number} the maximum concurrent invocation of `fn` to call
20732083
on the stream at once. **Default:** `1`.
2084+
* `highWaterMark` {number} how many items to buffer while waiting for user
2085+
consumption of the filtered items. **Default:** `concurrency * 2 - 1`.
20742086
* `signal` {AbortSignal} allows destroying the stream if the signal is
20752087
aborted.
20762088
* Returns: {Readable} a stream filtered with the predicate `fn`.

lib/internal/streams/operators.js

+41-14
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ const {
3333
NumberIsNaN,
3434
Promise,
3535
PromiseReject,
36+
PromiseResolve,
3637
PromisePrototypeThen,
3738
Symbol,
3839
} = primordials;
@@ -82,7 +83,15 @@ function map(fn, options) {
8283
concurrency = MathFloor(options.concurrency);
8384
}
8485

85-
validateInteger(concurrency, 'concurrency', 1);
86+
let highWaterMark = concurrency - 1;
87+
if (options?.highWaterMark != null) {
88+
highWaterMark = MathFloor(options.highWaterMark);
89+
}
90+
91+
validateInteger(concurrency, 'options.concurrency', 1);
92+
validateInteger(highWaterMark, 'options.highWaterMark', 0);
93+
94+
highWaterMark += concurrency;
8695

8796
return async function* map() {
8897
const signal = AbortSignal.any([options?.signal].filter(Boolean));
@@ -93,9 +102,28 @@ function map(fn, options) {
93102
let next;
94103
let resume;
95104
let done = false;
105+
let cnt = 0;
96106

97-
function onDone() {
107+
function onCatch() {
98108
done = true;
109+
afterItemProcessed();
110+
}
111+
112+
function afterItemProcessed() {
113+
cnt -= 1;
114+
maybeResume();
115+
}
116+
117+
function maybeResume() {
118+
if (
119+
resume &&
120+
!done &&
121+
cnt < concurrency &&
122+
queue.length < highWaterMark
123+
) {
124+
resume();
125+
resume = null;
126+
}
99127
}
100128

101129
async function pump() {
@@ -111,25 +139,27 @@ function map(fn, options) {
111139

112140
try {
113141
val = fn(val, signalOpt);
142+
143+
if (val === kEmpty) {
144+
continue;
145+
}
146+
147+
val = PromiseResolve(val);
114148
} catch (err) {
115149
val = PromiseReject(err);
116150
}
117151

118-
if (val === kEmpty) {
119-
continue;
120-
}
152+
cnt += 1;
121153

122-
if (typeof val?.catch === 'function') {
123-
val.catch(onDone);
124-
}
154+
PromisePrototypeThen(val, afterItemProcessed, onCatch);
125155

126156
queue.push(val);
127157
if (next) {
128158
next();
129159
next = null;
130160
}
131161

132-
if (!done && queue.length && queue.length >= concurrency) {
162+
if (!done && (queue.length >= highWaterMark || cnt >= concurrency)) {
133163
await new Promise((resolve) => {
134164
resume = resolve;
135165
});
@@ -138,7 +168,7 @@ function map(fn, options) {
138168
queue.push(kEof);
139169
} catch (err) {
140170
const val = PromiseReject(err);
141-
PromisePrototypeThen(val, undefined, onDone);
171+
PromisePrototypeThen(val, afterItemProcessed, onCatch);
142172
queue.push(val);
143173
} finally {
144174
done = true;
@@ -169,10 +199,7 @@ function map(fn, options) {
169199
}
170200

171201
queue.shift();
172-
if (resume) {
173-
resume();
174-
resume = null;
175-
}
202+
maybeResume();
176203
}
177204

178205
await new Promise((resolve) => {

test/parallel/test-stream-forEach.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ const { once } = require('events');
9696
Readable.from([1, 2, 3, 4]).forEach(async (_, { signal }) => {
9797
calls++;
9898
await once(signal, 'abort');
99-
}, { signal: ac.signal, concurrency: 2 });
99+
}, { signal: ac.signal, concurrency: 2, highWaterMark: 0 });
100100
// pump
101101
assert.rejects(async () => {
102102
await forEachPromise;

test/parallel/test-stream-map.js

+172-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,25 @@ const assert = require('assert');
88
const { once } = require('events');
99
const { setTimeout } = require('timers/promises');
1010

11+
function createDependentPromises(n) {
12+
const promiseAndResolveArray = [];
13+
14+
for (let i = 0; i < n; i++) {
15+
let res;
16+
const promise = new Promise((resolve) => {
17+
if (i === 0) {
18+
res = resolve;
19+
return;
20+
}
21+
res = () => promiseAndResolveArray[i - 1][0].then(resolve);
22+
});
23+
24+
promiseAndResolveArray.push([promise, res]);
25+
}
26+
27+
return promiseAndResolveArray;
28+
}
29+
1130
{
1231
// Map works on synchronous streams with a synchronous mapper
1332
const stream = Readable.from([1, 2, 3, 4, 5]).map((x) => x + x);
@@ -143,7 +162,7 @@ const { setTimeout } = require('timers/promises');
143162
const stream = range.map(common.mustCall(async (_, { signal }) => {
144163
await once(signal, 'abort');
145164
throw signal.reason;
146-
}, 2), { signal: ac.signal, concurrency: 2 });
165+
}, 2), { signal: ac.signal, concurrency: 2, highWaterMark: 0 });
147166
// pump
148167
assert.rejects(async () => {
149168
for await (const item of stream) {
@@ -173,12 +192,164 @@ const { setTimeout } = require('timers/promises');
173192
})().then(common.mustCall());
174193
}
175194

195+
196+
{
197+
// highWaterMark with small concurrency
198+
const finishOrder = [];
199+
200+
const promises = createDependentPromises(4);
201+
202+
const raw = Readable.from([2, 0, 1, 3]);
203+
const stream = raw.map(async (item) => {
204+
const [promise, resolve] = promises[item];
205+
resolve();
206+
207+
await promise;
208+
finishOrder.push(item);
209+
return item;
210+
}, { concurrency: 2 });
211+
212+
(async () => {
213+
await stream.toArray();
214+
215+
assert.deepStrictEqual(finishOrder, [0, 1, 2, 3]);
216+
})().then(common.mustCall(), common.mustNotCall());
217+
}
218+
219+
{
220+
// highWaterMark with a lot of items and large concurrency
221+
const finishOrder = [];
222+
223+
const promises = createDependentPromises(20);
224+
225+
const input = [10, 1, 0, 3, 4, 2, 5, 7, 8, 9, 6, 11, 12, 13, 18, 15, 16, 17, 14, 19];
226+
const raw = Readable.from(input);
227+
// Should be
228+
// 10, 1, 0, 3, 4, 2 | next: 0
229+
// 10, 1, 3, 4, 2, 5 | next: 1
230+
// 10, 3, 4, 2, 5, 7 | next: 2
231+
// 10, 3, 4, 5, 7, 8 | next: 3
232+
// 10, 4, 5, 7, 8, 9 | next: 4
233+
// 10, 5, 7, 8, 9, 6 | next: 5
234+
// 10, 7, 8, 9, 6, 11 | next: 6
235+
// 10, 7, 8, 9, 11, 12 | next: 7
236+
// 10, 8, 9, 11, 12, 13 | next: 8
237+
// 10, 9, 11, 12, 13, 18 | next: 9
238+
// 10, 11, 12, 13, 18, 15 | next: 10
239+
// 11, 12, 13, 18, 15, 16 | next: 11
240+
// 12, 13, 18, 15, 16, 17 | next: 12
241+
// 13, 18, 15, 16, 17, 14 | next: 13
242+
// 18, 15, 16, 17, 14, 19 | next: 14
243+
// 18, 15, 16, 17, 19 | next: 15
244+
// 18, 16, 17, 19 | next: 16
245+
// 18, 17, 19 | next: 17
246+
// 18, 19 | next: 18
247+
// 19 | next: 19
248+
//
249+
250+
const stream = raw.map(async (item) => {
251+
const [promise, resolve] = promises[item];
252+
resolve();
253+
254+
await promise;
255+
finishOrder.push(item);
256+
return item;
257+
}, { concurrency: 6 });
258+
259+
(async () => {
260+
const outputOrder = await stream.toArray();
261+
262+
assert.deepStrictEqual(outputOrder, input);
263+
assert.deepStrictEqual(finishOrder, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
264+
})().then(common.mustCall(), common.mustNotCall());
265+
}
266+
267+
{
268+
// Custom highWaterMark with a lot of items and large concurrency
269+
const finishOrder = [];
270+
271+
const promises = createDependentPromises(20);
272+
273+
const input = [11, 1, 0, 3, 4, 2, 5, 7, 8, 9, 6, 10, 12, 13, 18, 15, 16, 17, 14, 19];
274+
const raw = Readable.from(input);
275+
// Should be
276+
// 11, 1, 0, 3, 4 | next: 0, buffer: []
277+
// 11, 1, 3, 4, 2 | next: 1, buffer: [0]
278+
// 11, 3, 4, 2, 5 | next: 2, buffer: [0, 1]
279+
// 11, 3, 4, 5, 7 | next: 3, buffer: [0, 1, 2]
280+
// 11, 4, 5, 7, 8 | next: 4, buffer: [0, 1, 2, 3]
281+
// 11, 5, 7, 8, 9 | next: 5, buffer: [0, 1, 2, 3, 4]
282+
// 11, 7, 8, 9, 6 | next: 6, buffer: [0, 1, 2, 3, 4, 5]
283+
// 11, 7, 8, 9, 10 | next: 7, buffer: [0, 1, 2, 3, 4, 5, 6] -- buffer full
284+
// 11, 8, 9, 10, 12 | next: 8, buffer: [0, 1, 2, 3, 4, 5, 6]
285+
// 11, 9, 10, 12, 13 | next: 9, buffer: [0, 1, 2, 3, 4, 5, 6]
286+
// 11, 10, 12, 13, 18 | next: 10, buffer: [0, 1, 2, 3, 4, 5, 6]
287+
// 11, 12, 13, 18, 15 | next: 11, buffer: [0, 1, 2, 3, 4, 5, 6]
288+
// 12, 13, 18, 15, 16 | next: 12, buffer: [] -- all items flushed as 11 is consumed and all the items wait for it
289+
// 13, 18, 15, 16, 17 | next: 13, buffer: []
290+
// 18, 15, 16, 17, 14 | next: 14, buffer: []
291+
// 18, 15, 16, 17, 19 | next: 15, buffer: [14]
292+
// 18, 16, 17, 19 | next: 16, buffer: [14, 15]
293+
// 18, 17, 19 | next: 17, buffer: [14, 15, 16]
294+
// 18, 19 | next: 18, buffer: [14, 15, 16, 17]
295+
// 19 | next: 19, buffer: [] -- all items flushed
296+
//
297+
298+
const stream = raw.map(async (item) => {
299+
const [promise, resolve] = promises[item];
300+
resolve();
301+
302+
await promise;
303+
finishOrder.push(item);
304+
return item;
305+
}, { concurrency: 5, highWaterMark: 7 });
306+
307+
(async () => {
308+
const outputOrder = await stream.toArray();
309+
310+
assert.deepStrictEqual(outputOrder, input);
311+
assert.deepStrictEqual(finishOrder, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]);
312+
})().then(common.mustCall(), common.mustNotCall());
313+
}
314+
315+
{
316+
// Where there is a delay between the first and the next item it should not wait for filled queue
317+
// before yielding to the user
318+
const promises = createDependentPromises(3);
319+
320+
const raw = Readable.from([0, 1, 2]);
321+
322+
const stream = raw
323+
.map(async (item) => {
324+
if (item !== 0) {
325+
await promises[item][0];
326+
}
327+
328+
return item;
329+
}, { concurrency: 2 })
330+
.map((item) => {
331+
// eslint-disable-next-line no-unused-vars
332+
for (const [_, resolve] of promises) {
333+
resolve();
334+
}
335+
336+
return item;
337+
});
338+
339+
(async () => {
340+
await stream.toArray();
341+
})().then(common.mustCall(), common.mustNotCall());
342+
}
343+
176344
{
177345
// Error cases
178346
assert.throws(() => Readable.from([1]).map(1), /ERR_INVALID_ARG_TYPE/);
179347
assert.throws(() => Readable.from([1]).map((x) => x, {
180348
concurrency: 'Foo'
181349
}), /ERR_OUT_OF_RANGE/);
350+
assert.throws(() => Readable.from([1]).map((x) => x, {
351+
concurrency: -1
352+
}), /ERR_OUT_OF_RANGE/);
182353
assert.throws(() => Readable.from([1]).map((x) => x, 1), /ERR_INVALID_ARG_TYPE/);
183354
assert.throws(() => Readable.from([1]).map((x) => x, { signal: true }), /ERR_INVALID_ARG_TYPE/);
184355
}

0 commit comments

Comments
 (0)