Skip to content

Commit 1c263e3

Browse files
authored
bpo-37029: keep usable_arenas in sorted order without searching (#13612)
This adds a vector of "search fingers" so that usable_arenas can be kept in sorted order (by number of free pools) via constant-time operations instead of linear search. This should reduce worst-case time for reclaiming a great many objects from O(A**2) to O(A), where A is the number of arenas. See bpo-37029.
1 parent 549e55a commit 1c263e3

File tree

2 files changed

+78
-32
lines changed

2 files changed

+78
-32
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Freeing a great many small objects could take time quadratic in the number of arenas, due to using linear search to keep ``obmalloc.c``'s list of usable arenas sorted by order of number of free memory pools. This is accomplished without search now, leaving the worst-case time linear in the number of arenas. For programs where this quite visibly matters (typically with more than 100 thousand small objects alive simultaneously), this can greatly reduce the time needed to release their memory.

Objects/obmalloc.c

Lines changed: 77 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,11 @@ static int running_on_valgrind = -1;
918918
#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
919919
#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
920920

921+
#define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE)
922+
#if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
923+
# error "arena size not an exact multiple of pool size"
924+
#endif
925+
921926
/*
922927
* -- End of tunable settings section --
923928
*/
@@ -1155,6 +1160,18 @@ usable_arenas
11551160
11561161
Note that an arena_object associated with an arena all of whose pools are
11571162
currently in use isn't on either list.
1163+
1164+
Changed in Python 3.8: keeping usable_arenas sorted by number of free pools
1165+
used to be done by one-at-a-time linear search when an arena's number of
1166+
free pools changed. That could, overall, consume time quadratic in the
1167+
number of arenas. That didn't really matter when there were only a few
1168+
hundred arenas (typical!), but could be a timing disaster when there were
1169+
hundreds of thousands. See bpo-37029.
1170+
1171+
Now we have a vector of "search fingers" to eliminate the need to search:
1172+
nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas
1173+
with nfp free pools. This is NULL if and only if there is no arena with
1174+
nfp free pools in usable_arenas.
11581175
*/
11591176

11601177
/* Array of objects used to track chunks of memory (arenas). */
@@ -1172,6 +1189,9 @@ static struct arena_object* unused_arena_objects = NULL;
11721189
*/
11731190
static struct arena_object* usable_arenas = NULL;
11741191

1192+
/* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */
1193+
static struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1] = { NULL };
1194+
11751195
/* How many arena_objects do we initially allocate?
11761196
* 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
11771197
* `arenas` vector.
@@ -1281,8 +1301,7 @@ new_arena(void)
12811301
/* pool_address <- first pool-aligned address in the arena
12821302
nfreepools <- number of whole pools that fit after alignment */
12831303
arenaobj->pool_address = (block*)arenaobj->address;
1284-
arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
1285-
assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
1304+
arenaobj->nfreepools = MAX_POOLS_IN_ARENA;
12861305
excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
12871306
if (excess != 0) {
12881307
--arenaobj->nfreepools;
@@ -1478,30 +1497,39 @@ pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
14781497
}
14791498
usable_arenas->nextarena =
14801499
usable_arenas->prevarena = NULL;
1500+
assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
1501+
nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
14811502
}
14821503
assert(usable_arenas->address != 0);
14831504

1505+
/* This arena already had the smallest nfreepools value, so decreasing
1506+
* nfreepools doesn't change that, and we don't need to rearrange the
1507+
* usable_arenas list. However, if the arena becomes wholly allocated,
1508+
* we need to remove its arena_object from usable_arenas.
1509+
*/
1510+
assert(usable_arenas->nfreepools > 0);
1511+
if (nfp2lasta[usable_arenas->nfreepools] == usable_arenas) {
1512+
/* It's the last of this size, so there won't be any. */
1513+
nfp2lasta[usable_arenas->nfreepools] = NULL;
1514+
}
1515+
/* If any free pools will remain, it will be the new smallest. */
1516+
if (usable_arenas->nfreepools > 1) {
1517+
assert(nfp2lasta[usable_arenas->nfreepools - 1] == NULL);
1518+
nfp2lasta[usable_arenas->nfreepools - 1] = usable_arenas;
1519+
}
1520+
14841521
/* Try to get a cached free pool. */
14851522
pool = usable_arenas->freepools;
14861523
if (pool != NULL) {
14871524
/* Unlink from cached pools. */
14881525
usable_arenas->freepools = pool->nextpool;
1489-
1490-
/* This arena already had the smallest nfreepools
1491-
* value, so decreasing nfreepools doesn't change
1492-
* that, and we don't need to rearrange the
1493-
* usable_arenas list. However, if the arena has
1494-
* become wholly allocated, we need to remove its
1495-
* arena_object from usable_arenas.
1496-
*/
14971526
--usable_arenas->nfreepools;
14981527
if (usable_arenas->nfreepools == 0) {
14991528
/* Wholly allocated: remove. */
15001529
assert(usable_arenas->freepools == NULL);
15011530
assert(usable_arenas->nextarena == NULL ||
15021531
usable_arenas->nextarena->prevarena ==
15031532
usable_arenas);
1504-
15051533
usable_arenas = usable_arenas->nextarena;
15061534
if (usable_arenas != NULL) {
15071535
usable_arenas->prevarena = NULL;
@@ -1709,7 +1737,23 @@ pymalloc_free(void *ctx, void *p)
17091737
ao = &arenas[pool->arenaindex];
17101738
pool->nextpool = ao->freepools;
17111739
ao->freepools = pool;
1712-
nf = ++ao->nfreepools;
1740+
nf = ao->nfreepools;
1741+
/* If this is the rightmost arena with this number of free pools,
1742+
* nfp2lasta[nf] needs to change. Caution: if nf is 0, there
1743+
* are no arenas in usable_arenas with that value.
1744+
*/
1745+
struct arena_object* lastnf = nfp2lasta[nf];
1746+
assert((nf == 0 && lastnf == NULL) ||
1747+
(nf > 0 &&
1748+
lastnf != NULL &&
1749+
lastnf->nfreepools == nf &&
1750+
(lastnf->nextarena == NULL ||
1751+
nf < lastnf->nextarena->nfreepools)));
1752+
if (lastnf == ao) { /* it is the rightmost */
1753+
struct arena_object* p = ao->prevarena;
1754+
nfp2lasta[nf] = (p != NULL && p->nfreepools == nf) ? p : NULL;
1755+
}
1756+
ao->nfreepools = ++nf;
17131757

17141758
/* All the rest is arena management. We just freed
17151759
* a pool, and there are 4 cases for arena mgmt:
@@ -1777,6 +1821,9 @@ pymalloc_free(void *ctx, void *p)
17771821
usable_arenas->prevarena = ao;
17781822
usable_arenas = ao;
17791823
assert(usable_arenas->address != 0);
1824+
if (nfp2lasta[1] == NULL) {
1825+
nfp2lasta[1] = ao;
1826+
}
17801827

17811828
goto success;
17821829
}
@@ -1788,14 +1835,23 @@ pymalloc_free(void *ctx, void *p)
17881835
* a few un-scientific tests, it seems like this
17891836
* approach allowed a lot more memory to be freed.
17901837
*/
1791-
if (ao->nextarena == NULL ||
1792-
nf <= ao->nextarena->nfreepools) {
1838+
/* If this is the only arena with nf, record that. */
1839+
if (nfp2lasta[nf] == NULL) {
1840+
nfp2lasta[nf] = ao;
1841+
} /* else the rightmost with nf doesn't change */
1842+
/* If this was the rightmost of the old size, it remains in place. */
1843+
if (ao == lastnf) {
17931844
/* Case 4. Nothing to do. */
17941845
goto success;
17951846
}
1796-
/* Case 3: We have to move the arena towards the end
1797-
* of the list, because it has more free pools than
1798-
* the arena to its right.
1847+
/* If ao were the only arena in the list, the last block would have
1848+
* gotten us out.
1849+
*/
1850+
assert(ao->nextarena != NULL);
1851+
1852+
/* Case 3: We have to move the arena towards the end of the list,
1853+
* because it has more free pools than the arena to its right. It needs
1854+
* to move to follow lastnf.
17991855
* First unlink ao from usable_arenas.
18001856
*/
18011857
if (ao->prevarena != NULL) {
@@ -1809,24 +1865,13 @@ pymalloc_free(void *ctx, void *p)
18091865
usable_arenas = ao->nextarena;
18101866
}
18111867
ao->nextarena->prevarena = ao->prevarena;
1812-
1813-
/* Locate the new insertion point by iterating over
1814-
* the list, using our nextarena pointer.
1815-
*/
1816-
while (ao->nextarena != NULL && nf > ao->nextarena->nfreepools) {
1817-
ao->prevarena = ao->nextarena;
1818-
ao->nextarena = ao->nextarena->nextarena;
1819-
}
1820-
1821-
/* Insert ao at this point. */
1822-
assert(ao->nextarena == NULL || ao->prevarena == ao->nextarena->prevarena);
1823-
assert(ao->prevarena->nextarena == ao->nextarena);
1824-
1825-
ao->prevarena->nextarena = ao;
1868+
/* And insert after lastnf. */
1869+
ao->prevarena = lastnf;
1870+
ao->nextarena = lastnf->nextarena;
18261871
if (ao->nextarena != NULL) {
18271872
ao->nextarena->prevarena = ao;
18281873
}
1829-
1874+
lastnf->nextarena = ao;
18301875
/* Verify that the swaps worked. */
18311876
assert(ao->nextarena == NULL || nf <= ao->nextarena->nfreepools);
18321877
assert(ao->prevarena == NULL || nf > ao->prevarena->nfreepools);

0 commit comments

Comments
 (0)