diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 0000000000..510815dce8 --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,42 @@ +version: '{build}' + +environment: + matrix: + - MSYSTEM: MINGW64 + CPU: x86_64 + MSVC: amd64 + - MSYSTEM: MINGW32 + CPU: i686 + MSVC: x86 + - MSYSTEM: MINGW64 + CPU: x86_64 + - MSYSTEM: MINGW32 + CPU: i686 + - MSYSTEM: MINGW64 + CPU: x86_64 + MSVC: amd64 + CONFIG_FLAGS: --enable-debug + - MSYSTEM: MINGW32 + CPU: i686 + MSVC: x86 + CONFIG_FLAGS: --enable-debug + - MSYSTEM: MINGW64 + CPU: x86_64 + CONFIG_FLAGS: --enable-debug + - MSYSTEM: MINGW32 + CPU: i686 + CONFIG_FLAGS: --enable-debug + +install: + - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% + - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC% + - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc + - pacman --noconfirm -Suy mingw-w64-%CPU%-make + +build_script: + - bash -c "autoconf" + - bash -c "./configure $CONFIG_FLAGS" + - mingw32-make -j3 + - file lib/jemalloc.dll + - mingw32-make -j3 tests + - mingw32-make -k check diff --git a/.autom4te.cfg b/.autom4te.cfg new file mode 100644 index 0000000000..fe2424db5e --- /dev/null +++ b/.autom4te.cfg @@ -0,0 +1,3 @@ +begin-language: "Autoconf-without-aclocal-m4" +args: --no-cache +end-language: "Autoconf-without-aclocal-m4" diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000..6313b56c57 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +* text=auto eol=lf diff --git a/.gitignore b/.gitignore index 4c408ec2c2..08278d087b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ /*.gcov.* -/autom4te.cache/ - +/bin/jemalloc-config /bin/jemalloc.sh +/bin/jeprof /config.stamp /config.log @@ -15,6 +15,8 @@ /doc/jemalloc.html /doc/jemalloc.3 +/jemalloc.pc + /lib/ /Makefile @@ -35,6 +37,7 @@ /include/jemalloc/jemalloc_protos.h /include/jemalloc/jemalloc_protos_jet.h /include/jemalloc/jemalloc_rename.h +/include/jemalloc/jemalloc_typedefs.h /src/*.[od] /src/*.gcda @@ -70,3 +73,19 @@ test/include/test/jemalloc_test_defs.h /test/unit/*.out /VERSION + +*.pdb +*.sdf +*.opendb +*.opensdf +*.cachefile +*.suo +*.user +*.sln.docstates +*.tmp +/msvc/Win32/ +/msvc/x64/ +/msvc/projects/*/*/Debug*/ +/msvc/projects/*/*/Release*/ +/msvc/projects/*/*/Win32/ +/msvc/projects/*/*/x64/ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..b563928c13 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,95 @@ +language: generic + +matrix: + include: + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="" + - os: osx + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="" + - os: linux + env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="" + - os: linux + env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" + addons: + apt: + packages: + - gcc-multilib + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" + - os: osx + env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="" + - os: osx + env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" + - os: osx + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" + - os: osx + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" + - os: osx + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" + - os: linux + env: CC=clang COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" + addons: + apt: + packages: + - gcc-multilib + - os: linux + env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" + - os: linux + env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" + - os: linux + env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" + - os: linux + env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" + - os: linux + env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" + addons: + apt: + packages: + - gcc-multilib + - os: linux + env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" + addons: + apt: + packages: + - gcc-multilib + - os: linux + env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" + addons: + apt: + packages: + - gcc-multilib + - os: linux + env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-tcache" + addons: + apt: + packages: + - gcc-multilib + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-tcache" + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-tcache" + - os: linux + env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-tcache" + + +before_script: + - autoconf + - ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" } $CONFIGURE_FLAGS + - make -j3 + - make -j3 tests + +script: + - make check + diff --git a/COPYING b/COPYING index bdda0feb9e..104b1f8b01 100644 --- a/COPYING +++ b/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2014 Jason Evans . +Copyright (C) 2002-2016 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2014 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2016 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/ChangeLog b/ChangeLog index d56ee999e6..a9406853e1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,505 @@ Following are change highlights associated with official releases. Important -bug fixes are all mentioned, but internal enhancements are omitted here for -brevity (even though they are more fun to write about). Much more detail can be -found in the git revision history: +bug fixes are all mentioned, but some internal enhancements are omitted here for +brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 4.5.0 (February 28, 2017) + + This is the first release to benefit from much broader continuous integration + testing, thanks to @davidtgoldblatt. Had we had this testing infrastructure + in place for prior releases, it would have caught all of the most serious + regressions fixed by this release. + + New features: + - Add --disable-thp and the opt.thp to provide opt-out mechanisms for + transparent huge page integration. (@jasone) + - Update zone allocator integration to work with macOS 10.12. (@glandium) + - Restructure *CFLAGS configuration, so that CFLAGS behaves typically, and + EXTRA_CFLAGS provides a way to specify e.g. -Werror during building, but not + during configuration. (@jasone, @ronawho) + + Bug fixes: + - Fix DSS (sbrk(2)-based) allocation. This regression was first released in + 4.3.0. (@jasone) + - Handle race in per size class utilization computation. This functionality + was first released in 4.0.0. (@interwq) + - Fix lock order reversal during gdump. (@jasone) + - Fix-refactor tcache synchronization. This regression was first released in + 4.0.0. (@jasone) + - Fix various JSON-formatted malloc_stats_print() bugs. This functionality + was first released in 4.3.0. (@jasone) + - Fix huge-aligned allocation. This regression was first released in 4.4.0. + (@jasone) + - When transparent huge page integration is enabled, detect what state pages + start in according to the kernel's current operating mode, and only convert + arena chunks to non-huge during purging if that is not their initial state. + This functionality was first released in 4.4.0. (@jasone) + - Fix lg_chunk clamping for the --enable-cache-oblivious --disable-fill case. + This regression was first released in 4.0.0. (@jasone, @428desmo) + - Properly detect sparc64 when building for Linux. (@glaubitz) + +* 4.4.0 (December 3, 2016) + + New features: + - Add configure support for *-*-linux-android. (@cferris1000, @jasone) + - Add the --disable-syscall configure option, for use on systems that place + security-motivated limitations on syscall(2). (@jasone) + - Add support for Debian GNU/kFreeBSD. (@thesam) + + Optimizations: + - Add extent serial numbers and use them where appropriate as a sort key that + is higher priority than address, so that the allocation policy prefers older + extents. This tends to improve locality (decrease fragmentation) when + memory grows downward. (@jasone) + - Refactor madvise(2) configuration so that MADV_FREE is detected and utilized + on Linux 4.5 and newer. (@jasone) + - Mark partially purged arena chunks as non-huge-page. This improves + interaction with Linux's transparent huge page functionality. (@jasone) + + Bug fixes: + - Fix size class computations for edge conditions involving extremely large + allocations. This regression was first released in 4.0.0. (@jasone, + @ingvarha) + - Remove overly restrictive assertions related to the cactive statistic. This + regression was first released in 4.1.0. (@jasone) + - Implement a more reliable detection scheme for os_unfair_lock on macOS. + (@jszakmeister) + +* 4.3.1 (November 7, 2016) + + Bug fixes: + - Fix a severe virtual memory leak. This regression was first released in + 4.3.0. (@interwq, @jasone) + - Refactor atomic and prng APIs to restore support for 32-bit platforms that + use pre-C11 toolchains, e.g. FreeBSD's mips. (@jasone) + +* 4.3.0 (November 4, 2016) + + This is the first release that passes the test suite for multiple Windows + configurations, thanks in large part to @glandium setting up continuous + integration via AppVeyor (and Travis CI for Linux and OS X). + + New features: + - Add "J" (JSON) support to malloc_stats_print(). (@jasone) + - Add Cray compiler support. (@ronawho) + + Optimizations: + - Add/use adaptive spinning for bootstrapping and radix tree node + initialization. (@jasone) + + Bug fixes: + - Fix large allocation to search starting in the optimal size class heap, + which can substantially reduce virtual memory churn and fragmentation. This + regression was first released in 4.0.0. (@mjp41, @jasone) + - Fix stats.arenas..nthreads accounting. (@interwq) + - Fix and simplify decay-based purging. (@jasone) + - Make DSS (sbrk(2)-related) operations lockless, which resolves potential + deadlocks during thread exit. (@jasone) + - Fix over-sized allocation of radix tree leaf nodes. (@mjp41, @ogaun, + @jasone) + - Fix over-sized allocation of arena_t (plus associated stats) data + structures. (@jasone, @interwq) + - Fix EXTRA_CFLAGS to not affect configuration. (@jasone) + - Fix a Valgrind integration bug. (@ronawho) + - Disallow 0x5a junk filling when running in Valgrind. (@jasone) + - Fix a file descriptor leak on Linux. This regression was first released in + 4.2.0. (@vsarunas, @jasone) + - Fix static linking of jemalloc with glibc. (@djwatson) + - Use syscall(2) rather than {open,read,close}(2) during boot on Linux. This + works around other libraries' system call wrappers performing reentrant + allocation. (@kspinka, @Whissi, @jasone) + - Fix OS X default zone replacement to work with OS X 10.12. (@glandium, + @jasone) + - Fix cached memory management to avoid needless commit/decommit operations + during purging, which resolves permanent virtual memory map fragmentation + issues on Windows. (@mjp41, @jasone) + - Fix TSD fetches to avoid (recursive) allocation. This is relevant to + non-TLS and Windows configurations. (@jasone) + - Fix malloc_conf overriding to work on Windows. (@jasone) + - Forcibly disable lazy-lock on Windows (was forcibly *enabled*). (@jasone) + +* 4.2.1 (June 8, 2016) + + Bug fixes: + - Fix bootstrapping issues for configurations that require allocation during + tsd initialization (e.g. --disable-tls). (@cferris1000, @jasone) + - Fix gettimeofday() version of nstime_update(). (@ronawho) + - Fix Valgrind regressions in calloc() and chunk_alloc_wrapper(). (@ronawho) + - Fix potential VM map fragmentation regression. (@jasone) + - Fix opt_zero-triggered in-place huge reallocation zeroing. (@jasone) + - Fix heap profiling context leaks in reallocation edge cases. (@jasone) + +* 4.2.0 (May 12, 2016) + + New features: + - Add the arena..reset mallctl, which makes it possible to discard all of + an arena's allocations in a single operation. (@jasone) + - Add the stats.retained and stats.arenas..retained statistics. (@jasone) + - Add the --with-version configure option. (@jasone) + - Support --with-lg-page values larger than actual page size. (@jasone) + + Optimizations: + - Use pairing heaps rather than red-black trees for various hot data + structures. (@djwatson, @jasone) + - Streamline fast paths of rtree operations. (@jasone) + - Optimize the fast paths of calloc() and [m,d,sd]allocx(). (@jasone) + - Decommit unused virtual memory if the OS does not overcommit. (@jasone) + - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order + to avoid unfortunate interactions during fork(2). (@jasone) + + Bug fixes: + - Fix chunk accounting related to triggering gdump profiles. (@jasone) + - Link against librt for clock_gettime(2) if glibc < 2.17. (@jasone) + - Scale leak report summary according to sampling probability. (@jasone) + +* 4.1.1 (May 3, 2016) + + This bugfix release resolves a variety of mostly minor issues, though the + bitmap fix is critical for 64-bit Windows. + + Bug fixes: + - Fix the linear scan version of bitmap_sfu() to shift by the proper amount + even when sizeof(long) is not the same as sizeof(void *), as on 64-bit + Windows. (@jasone) + - Fix hashing functions to avoid unaligned memory accesses (and resulting + crashes). This is relevant at least to some ARM-based platforms. + (@rkmisra) + - Fix fork()-related lock rank ordering reversals. These reversals were + unlikely to cause deadlocks in practice except when heap profiling was + enabled and active. (@jasone) + - Fix various chunk leaks in OOM code paths. (@jasone) + - Fix malloc_stats_print() to print opt.narenas correctly. (@jasone) + - Fix MSVC-specific build/test issues. (@rustyx, @yuslepukhin) + - Fix a variety of test failures that were due to test fragility rather than + core bugs. (@jasone) + +* 4.1.0 (February 28, 2016) + + This release is primarily about optimizations, but it also incorporates a lot + of portability-motivated refactoring and enhancements. Many people worked on + this release, to an extent that even with the omission here of minor changes + (see git revision history), and of the people who reported and diagnosed + issues, so much of the work was contributed that starting with this release, + changes are annotated with author credits to help reflect the collaborative + effort involved. + + New features: + - Implement decay-based unused dirty page purging, a major optimization with + mallctl API impact. This is an alternative to the existing ratio-based + unused dirty page purging, and is intended to eventually become the sole + purging mechanism. New mallctls: + + opt.purge + + opt.decay_time + + arena..decay + + arena..decay_time + + arenas.decay_time + + stats.arenas..decay_time + (@jasone, @cevans87) + - Add --with-malloc-conf, which makes it possible to embed a default + options string during configuration. This was motivated by the desire to + specify --with-malloc-conf=purge:decay , since the default must remain + purge:ratio until the 5.0.0 release. (@jasone) + - Add MS Visual Studio 2015 support. (@rustyx, @yuslepukhin) + - Make *allocx() size class overflow behavior defined. The maximum + size class is now less than PTRDIFF_MAX to protect applications against + numerical overflow, and all allocation functions are guaranteed to indicate + errors rather than potentially crashing if the request size exceeds the + maximum size class. (@jasone) + - jeprof: + + Add raw heap profile support. (@jasone) + + Add --retain and --exclude for backtrace symbol filtering. (@jasone) + + Optimizations: + - Optimize the fast path to combine various bootstrapping and configuration + checks and execute more streamlined code in the common case. (@interwq) + - Use linear scan for small bitmaps (used for small object tracking). In + addition to speeding up bitmap operations on 64-bit systems, this reduces + allocator metadata overhead by approximately 0.2%. (@djwatson) + - Separate arena_avail trees, which substantially speeds up run tree + operations. (@djwatson) + - Use memoization (boot-time-computed table) for run quantization. Separate + arena_avail trees reduced the importance of this optimization. (@jasone) + - Attempt mmap-based in-place huge reallocation. This can dramatically speed + up incremental huge reallocation. (@jasone) + + Incompatible changes: + - Make opt.narenas unsigned rather than size_t. (@jasone) + + Bug fixes: + - Fix stats.cactive accounting regression. (@rustyx, @jasone) + - Handle unaligned keys in hash(). This caused problems for some ARM systems. + (@jasone, @cferris1000) + - Refactor arenas array. In addition to fixing a fork-related deadlock, this + makes arena lookups faster and simpler. (@jasone) + - Move retained memory allocation out of the default chunk allocation + function, to a location that gets executed even if the application installs + a custom chunk allocation function. This resolves a virtual memory leak. + (@buchgr) + - Fix a potential tsd cleanup leak. (@cferris1000, @jasone) + - Fix run quantization. In practice this bug had no impact unless + applications requested memory with alignment exceeding one page. + (@jasone, @djwatson) + - Fix LinuxThreads-specific bootstrapping deadlock. (Cosmin Paraschiv) + - jeprof: + + Don't discard curl options if timeout is not defined. (@djwatson) + + Detect failed profile fetches. (@djwatson) + - Fix stats.arenas..{dss,lg_dirty_mult,decay_time,pactive,pdirty} for + --disable-stats case. (@jasone) + +* 4.0.4 (October 24, 2015) + + This bugfix release fixes another xallocx() regression. No other regressions + have come to light in over a month, so this is likely a good starting point + for people who prefer to wait for "dot one" releases with all the major issues + shaken out. + + Bug fixes: + - Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of large + allocations that have been randomly assigned an offset of 0 when + --enable-cache-oblivious configure option is enabled. + +* 4.0.3 (September 24, 2015) + + This bugfix release continues the trend of xallocx() and heap profiling fixes. + + Bug fixes: + - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large + allocations when --enable-cache-oblivious configure option is enabled. + - Fix xallocx(..., MALLOCX_ZERO) to zero trailing bytes of huge allocations + when resizing from/to a size class that is not a multiple of the chunk size. + - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap + profile dumping started. + - Work around a potentially bad thread-specific data initialization + interaction with NPTL (glibc's pthreads implementation). + +* 4.0.2 (September 21, 2015) + + This bugfix release addresses a few bugs specific to heap profiling. + + Bug fixes: + - Fix ixallocx_prof_sample() to never modify nor create sampled small + allocations. xallocx() is in general incapable of moving small allocations, + so this fix removes buggy code without loss of generality. + - Fix irallocx_prof_sample() to always allocate large regions, even when + alignment is non-zero. + - Fix prof_alloc_rollback() to read tdata from thread-specific data rather + than dereferencing a potentially invalid tctx. + +* 4.0.1 (September 15, 2015) + + This is a bugfix release that is somewhat high risk due to the amount of + refactoring required to address deep xallocx() problems. As a side effect of + these fixes, xallocx() now tries harder to partially fulfill requests for + optional extra space. Note that a couple of minor heap profiling + optimizations are included, but these are better thought of as performance + fixes that were integral to disovering most of the other bugs. + + Optimizations: + - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the + fast path when heap profiling is enabled. Additionally, split a special + case out into arena_prof_tctx_reset(), which also avoids chunk metadata + reads. + - Optimize irallocx_prof() to optimistically update the sampler state. The + prior implementation appears to have been a holdover from when + rallocx()/xallocx() functionality was combined as rallocm(). + + Bug fixes: + - Fix TLS configuration such that it is enabled by default for platforms on + which it works correctly. + - Fix arenas_cache_cleanup() and arena_get_hard() to handle + allocation/deallocation within the application's thread-specific data + cleanup functions even after arenas_cache is torn down. + - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS. + - Fix chunk purge hook calls for in-place huge shrinking reallocation to + specify the old chunk size rather than the new chunk size. This bug caused + no correctness issues for the default chunk purge function, but was + visible to custom functions set via the "arena..chunk_hooks" mallctl. + - Fix heap profiling bugs: + + Fix heap profiling to distinguish among otherwise identical sample sites + with interposed resets (triggered via the "prof.reset" mallctl). This bug + could cause data structure corruption that would most likely result in a + segfault. + + Fix irealloc_prof() to prof_alloc_rollback() on OOM. + + Make one call to prof_active_get_unlocked() per allocation event, and use + the result throughout the relevant functions that handle an allocation + event. Also add a missing check in prof_realloc(). These fixes protect + allocation events against concurrent prof_active changes. + + Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample() + in the correct order. + + Fix prof_realloc() to call prof_free_sampled_object() after calling + prof_malloc_sample_object(). Prior to this fix, if tctx and old_tctx were + the same, the tctx could have been prematurely destroyed. + - Fix portability bugs: + + Don't bitshift by negative amounts when encoding/decoding run sizes in + chunk header maps. This affected systems with page sizes greater than 8 + KiB. + + Rename index_t to szind_t to avoid an existing type on Solaris. + + Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to + match glibc and avoid compilation errors when including both + jemalloc/jemalloc.h and malloc.h in C++ code. + + Don't assume that /bin/sh is appropriate when running size_classes.sh + during configuration. + + Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM. + + Link tests to librt if it contains clock_gettime(2). + +* 4.0.0 (August 17, 2015) + + This version contains many speed and space optimizations, both minor and + major. The major themes are generalization, unification, and simplification. + Although many of these optimizations cause no visible behavior change, their + cumulative effect is substantial. + + New features: + - Normalize size class spacing to be consistent across the complete size + range. By default there are four size classes per size doubling, but this + is now configurable via the --with-lg-size-class-group option. Also add the + --with-lg-page, --with-lg-page-sizes, --with-lg-quantum, and + --with-lg-tiny-min options, which can be used to tweak page and size class + settings. Impacts: + + Worst case performance for incrementally growing/shrinking reallocation + is improved because there are far fewer size classes, and therefore + copying happens less often. + + Internal fragmentation is limited to 20% for all but the smallest size + classes (those less than four times the quantum). (1B + 4 KiB) + and (1B + 4 MiB) previously suffered nearly 50% internal fragmentation. + + Chunk fragmentation tends to be lower because there are fewer distinct run + sizes to pack. + - Add support for explicit tcaches. The "tcache.create", "tcache.flush", and + "tcache.destroy" mallctls control tcache lifetime and flushing, and the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to the *allocx() API + control which tcache is used for each operation. + - Implement per thread heap profiling, as well as the ability to + enable/disable heap profiling on a per thread basis. Add the "prof.reset", + "prof.lg_sample", "thread.prof.name", "thread.prof.active", + "opt.prof_thread_active_init", "prof.thread_active_init", and + "thread.prof.active" mallctls. + - Add support for per arena application-specified chunk allocators, configured + via the "arena..chunk_hooks" mallctl. + - Refactor huge allocation to be managed by arenas, so that arenas now + function as general purpose independent allocators. This is important in + the context of user-specified chunk allocators, aside from the scalability + benefits. Related new statistics: + + The "stats.arenas..huge.allocated", "stats.arenas..huge.nmalloc", + "stats.arenas..huge.ndalloc", and "stats.arenas..huge.nrequests" + mallctls provide high level per arena huge allocation statistics. + + The "arenas.nhchunks", "arenas.hchunk..size", + "stats.arenas..hchunks..nmalloc", + "stats.arenas..hchunks..ndalloc", + "stats.arenas..hchunks..nrequests", and + "stats.arenas..hchunks..curhchunks" mallctls provide per size class + statistics. + - Add the 'util' column to malloc_stats_print() output, which reports the + proportion of available regions that are currently in use for each small + size class. + - Add "alloc" and "free" modes for for junk filling (see the "opt.junk" + mallctl), so that it is possible to separately enable junk filling for + allocation versus deallocation. + - Add the jemalloc-config script, which provides information about how + jemalloc was configured, and how to integrate it into application builds. + - Add metadata statistics, which are accessible via the "stats.metadata", + "stats.arenas..metadata.mapped", and + "stats.arenas..metadata.allocated" mallctls. + - Add the "stats.resident" mallctl, which reports the upper limit of + physically resident memory mapped by the allocator. + - Add per arena control over unused dirty page purging, via the + "arenas.lg_dirty_mult", "arena..lg_dirty_mult", and + "stats.arenas..lg_dirty_mult" mallctls. + - Add the "prof.gdump" mallctl, which makes it possible to toggle the gdump + feature on/off during program execution. + - Add sdallocx(), which implements sized deallocation. The primary + optimization over dallocx() is the removal of a metadata read, which often + suffers an L1 cache miss. + - Add missing header includes in jemalloc/jemalloc.h, so that applications + only have to #include . + - Add support for additional platforms: + + Bitrig + + Cygwin + + DragonFlyBSD + + iOS + + OpenBSD + + OpenRISC/or1k + + Optimizations: + - Maintain dirty runs in per arena LRUs rather than in per arena trees of + dirty-run-containing chunks. In practice this change significantly reduces + dirty page purging volume. + - Integrate whole chunks into the unused dirty page purging machinery. This + reduces the cost of repeated huge allocation/deallocation, because it + effectively introduces a cache of chunks. + - Split the arena chunk map into two separate arrays, in order to increase + cache locality for the frequently accessed bits. + - Move small run metadata out of runs, into arena chunk headers. This reduces + run fragmentation, smaller runs reduce external fragmentation for small size + classes, and packed (less uniformly aligned) metadata layout improves CPU + cache set distribution. + - Randomly distribute large allocation base pointer alignment relative to page + boundaries in order to more uniformly utilize CPU cache sets. This can be + disabled via the --disable-cache-oblivious configure option, and queried via + the "config.cache_oblivious" mallctl. + - Micro-optimize the fast paths for the public API functions. + - Refactor thread-specific data to reside in a single structure. This assures + that only a single TLS read is necessary per call into the public API. + - Implement in-place huge allocation growing and shrinking. + - Refactor rtree (radix tree for chunk lookups) to be lock-free, and make + additional optimizations that reduce maximum lookup depth to one or two + levels. This resolves what was a concurrency bottleneck for per arena huge + allocation, because a global data structure is critical for determining + which arenas own which huge allocations. + + Incompatible changes: + - Replace --enable-cc-silence with --disable-cc-silence to suppress spurious + warnings by default. + - Assure that the constness of malloc_usable_size()'s return type matches that + of the system implementation. + - Change the heap profile dump format to support per thread heap profiling, + rename pprof to jeprof, and enhance it with the --thread= option. As a + result, the bundled jeprof must now be used rather than the upstream + (gperftools) pprof. + - Disable "opt.prof_final" by default, in order to avoid atexit(3), which can + internally deadlock on some platforms. + - Change the "arenas.nlruns" mallctl type from size_t to unsigned. + - Replace the "stats.arenas..bins..allocated" mallctl with + "stats.arenas..bins..curregs". + - Ignore MALLOC_CONF in set{uid,gid,cap} binaries. + - Ignore MALLOCX_ARENA(a) in dallocx(), in favor of using the + MALLOCX_TCACHE(tc) and MALLOCX_TCACHE_NONE flags to control tcache usage. + + Removed features: + - Remove the *allocm() API, which is superseded by the *allocx() API. + - Remove the --enable-dss options, and make dss non-optional on all platforms + which support sbrk(2). + - Remove the "arenas.purge" mallctl, which was obsoleted by the + "arena..purge" mallctl in 3.1.0. + - Remove the unnecessary "opt.valgrind" mallctl; jemalloc automatically + detects whether it is running inside Valgrind. + - Remove the "stats.huge.allocated", "stats.huge.nmalloc", and + "stats.huge.ndalloc" mallctls. + - Remove the --enable-mremap option. + - Remove the "stats.chunks.current", "stats.chunks.total", and + "stats.chunks.high" mallctls. + + Bug fixes: + - Fix the cactive statistic to decrease (rather than increase) when active + memory decreases. This regression was first released in 3.5.0. + - Fix OOM handling in memalign() and valloc(). A variant of this bug existed + in all releases since 2.0.0, which introduced these functions. + - Fix an OOM-related regression in arena_tcache_fill_small(), which could + cause cache corruption on OOM. This regression was present in all releases + from 2.2.0 through 3.6.0. + - Fix size class overflow handling for malloc(), posix_memalign(), memalign(), + calloc(), and realloc() when profiling is enabled. + - Fix the "arena..dss" mallctl to return an error if "primary" or + "secondary" precedence is specified, but sbrk(2) is not supported. + - Fix fallback lg_floor() implementations to handle extremely large inputs. + - Ensure the default purgeable zone is after the default zone on OS X. + - Fix latent bugs in atomic_*(). + - Fix the "arena..dss" mallctl to handle read-only calls. + - Fix tls_model configuration to enable the initial-exec model when possible. + - Mark malloc_conf as a weak symbol so that the application can override it. + - Correctly detect glibc's adaptive pthread mutexes. + - Fix the --without-export configure option. + * 3.6.0 (March 31, 2014) This version contains a critical bug fix for a regression present in 3.5.0 and @@ -21,7 +516,7 @@ found in the git revision history: backtracing to be reliable. - Use dss allocation precedence for huge allocations as well as small/large allocations. - - Fix test assertion failure message formatting. This bug did not manifect on + - Fix test assertion failure message formatting. This bug did not manifest on x86_64 systems because of implementation subtleties in va_list. - Fix inconsequential test failures for hash and SFMT code. @@ -516,7 +1011,7 @@ found in the git revision history: - Make it possible for the application to manually flush a thread's cache, via the "tcache.flush" mallctl. - Base maximum dirty page count on proportion of active memory. - - Compute various addtional run-time statistics, including per size class + - Compute various additional run-time statistics, including per size class statistics for large objects. - Expose malloc_stats_print(), which can be called repeatedly by the application. diff --git a/INSTALL b/INSTALL index 07f51d1e25..19196ec384 100644 --- a/INSTALL +++ b/INSTALL @@ -1,10 +1,23 @@ -Building and installing jemalloc can be as simple as typing the following while -in the root directory of the source tree: +Building and installing a packaged release of jemalloc can be as simple as +typing the following while in the root directory of the source tree: ./configure make make install +If building from unpackaged developer sources, the simplest command sequence +that might work is: + + ./autogen.sh + make dist + make + make install + +Note that documentation is not built by the default target because doing so +would create a dependency on xsltproc in packaged releases, hence the +requirement to either run 'make dist' or avoid installing docs via the various +install_* targets documented below. + === Advanced configuration ===================================================== The 'configure' script supports numerous options that allow control of which @@ -22,6 +35,10 @@ any of the following arguments (not a definitive list) to 'configure': will cause files to be installed into /usr/local/include, /usr/local/lib, and /usr/local/man. +--with-version=..--g + Use the specified version string rather than trying to generate one (if in + a git repository) or use existing the VERSION file (if present). + --with-rpath= Embed one or more library paths, so that libjemalloc can find the libraries it is linked to. This works only on ELF-based systems. @@ -56,7 +73,7 @@ any of the following arguments (not a definitive list) to 'configure': replace the "malloc", "calloc", etc. symbols. --without-export - Don't export public APIs. This can be useful when building jemalloc as a + Don't export public APIs. This can be useful when building jemalloc as a static library, or to avoid exporting public APIs when using the zone allocator on OSX. @@ -71,6 +88,14 @@ any of the following arguments (not a definitive list) to 'configure': versions of jemalloc can coexist in the same installation directory. For example, libjemalloc.so.0 becomes libjemalloc.so.0. +--with-malloc-conf= + Embed as a run-time options string that is processed prior to + the malloc_conf global variable, the /etc/malloc.conf symlink, and the + MALLOC_CONF environment variable. For example, to change the default chunk + size to 256 KiB: + + --with-malloc-conf=lg_chunk:18 + --disable-cc-silence Disable code that silences non-useful compiler warnings. This is mainly useful during development when auditing the set of warnings that are being @@ -94,15 +119,15 @@ any of the following arguments (not a definitive list) to 'configure': there are interactions between the various coverage targets, so it is usually advisable to run 'make clean' between repeated code coverage runs. ---enable-ivsalloc - Enable validation code, which verifies that pointers reside within - jemalloc-owned chunks before dereferencing them. This incurs a substantial - performance hit. - --disable-stats Disable statistics gathering functionality. See the "opt.stats_print" option documentation for usage details. +--enable-ivsalloc + Enable validation code, which verifies that pointers reside within + jemalloc-owned chunks before dereferencing them. This incurs a minor + performance hit. + --enable-prof Enable heap profiling and leak detection functionality. See the "opt.prof" option documentation for usage details. When enabled, there are several @@ -132,11 +157,12 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. ---enable-mremap - Enable huge realloc() via mremap(2). mremap() is disabled by default - because the flavor used is specific to Linux, which has a quirk in its - virtual memory allocation algorithm that causes semi-permanent VM map holes - under normal jemalloc operation. +--disable-thp + Disable transparent huge page (THP) integration. On systems with THP + support, THPs are explicitly disabled as a side effect of unused dirty page + purging for chunks that back small and/or large allocations, because such + chunks typically comprise active, unused dirty, and untouched clean + pages. --disable-munmap Disable virtual memory deallocation via munmap(2); instead keep track of @@ -154,7 +180,7 @@ any of the following arguments (not a definitive list) to 'configure': Disable support for Valgrind. --disable-zone-allocator - Disable zone allocator for Darwin. This means jemalloc won't be hooked as + Disable zone allocator for Darwin. This means jemalloc won't be hooked as the default allocator on OSX/iOS. --enable-utrace @@ -178,25 +204,125 @@ any of the following arguments (not a definitive list) to 'configure': thread-local variables via the __thread keyword. If TLS is available, jemalloc uses it for several purposes. +--disable-cache-oblivious + Disable cache-oblivious large allocation alignment for large allocation + requests with no alignment constraints. If this feature is disabled, all + large allocations are page-aligned as an implementation artifact, which can + severely harm CPU cache utilization. However, the cache-oblivious layout + comes at the cost of one extra page per large allocation, which in the + most extreme case increases physical memory usage for the 16 KiB size class + to 20 KiB. + +--disable-syscall + Disable use of syscall(2) rather than {open,read,write,close}(2). This is + intended as a workaround for systems that place security limitations on + syscall(2). + --with-xslroot= Specify where to find DocBook XSL stylesheets when building the documentation. +--with-lg-page= + Specify the base 2 log of the system page size. This option is only useful + when cross compiling, since the configure script automatically determines + the host's page size by default. + +--with-lg-page-sizes= + Specify the comma-separated base 2 logs of the page sizes to support. This + option may be useful when cross-compiling in combination with + --with-lg-page, but its primary use case is for integration with FreeBSD's + libc, wherein jemalloc is embedded. + +--with-lg-size-class-group= + Specify the base 2 log of how many size classes to use for each doubling in + size. By default jemalloc uses =2, which results in + e.g. the following size classes: + + [...], 64, + 80, 96, 112, 128, + 160, [...] + + =3 results in e.g. the following size classes: + + [...], 64, + 72, 80, 88, 96, 104, 112, 120, 128, + 144, [...] + + The minimal =0 causes jemalloc to only provide size + classes that are powers of 2: + + [...], + 64, + 128, + 256, + [...] + + An implementation detail currently limits the total number of small size + classes to 255, and a compilation error will result if the + you specify cannot be supported. The limit is + roughly =4, depending on page size. + +--with-lg-quantum= + Specify the base 2 log of the minimum allocation alignment. jemalloc needs + to know the minimum alignment that meets the following C standard + requirement (quoted from the April 12, 2011 draft of the C11 standard): + + The pointer returned if the allocation succeeds is suitably aligned so + that it may be assigned to a pointer to any type of object with a + fundamental alignment requirement and then used to access such an object + or an array of such objects in the space allocated [...] + + This setting is architecture-specific, and although jemalloc includes known + safe values for the most commonly used modern architectures, there is a + wrinkle related to GNU libc (glibc) that may impact your choice of + . On most modern architectures, this mandates 16-byte alignment + (=4), but the glibc developers chose not to meet this + requirement for performance reasons. An old discussion can be found at + https://sourceware.org/bugzilla/show_bug.cgi?id=206 . Unlike glibc, + jemalloc does follow the C standard by default (caveat: jemalloc + technically cheats if --with-lg-tiny-min is smaller than + --with-lg-quantum), but the fact that Linux systems already work around + this allocator noncompliance means that it is generally safe in practice to + let jemalloc's minimum alignment follow glibc's lead. If you specify + --with-lg-quantum=3 during configuration, jemalloc will provide additional + size classes that are not 16-byte-aligned (24, 40, and 56, assuming + --with-lg-size-class-group=2). + +--with-lg-tiny-min= + Specify the base 2 log of the minimum tiny size class to support. Tiny + size classes are powers of 2 less than the quantum, and are only + incorporated if is less than (see + --with-lg-quantum). Tiny size classes technically violate the C standard + requirement for minimum alignment, and crashes could conceivably result if + the compiler were to generate instructions that made alignment assumptions, + both because illegal instruction traps could result, and because accesses + could straddle page boundaries and cause segmentation faults due to + accessing unmapped addresses. + + The default of =3 works well in practice even on architectures + that technically require 16-byte alignment, probably for the same reason + --with-lg-quantum=3 works. Smaller tiny size classes can, and will, cause + crashes (see https://bugzilla.mozilla.org/show_bug.cgi?id=691003 for an + example). + + This option is rarely useful, and is mainly provided as documentation of a + subtle implementation detail. If you do use this option, specify a + value in [3, ..., ]. + The following environment variables (not a definitive list) impact configure's behavior: CFLAGS="?" - Pass these flags to the compiler. You probably shouldn't define this unless - you know what you are doing. (Use EXTRA_CFLAGS instead.) + Pass these flags to the C compiler. Any flags set by the configure script + are prepended, which means explicitly set flags generally take precedence. + Take care when specifying flags such as -Werror, because configure tests may + be affected in undesirable ways. EXTRA_CFLAGS="?" - Append these flags to CFLAGS. This makes it possible to add flags such as - -Werror, while allowing the configure script to determine what other flags - are appropriate for the specified configuration. - - The configure script specifically checks whether an optimization flag (-O*) - is specified in EXTRA_CFLAGS, and refrains from specifying an optimization - level if it finds that one has already been specified. + Append these flags to CFLAGS, without passing them to the compiler during + configuration. This makes it possible to add flags such as -Werror, while + allowing the configure script to determine what other flags are appropriate + for the specified configuration. CPPFLAGS="?" Pass these flags to the C preprocessor. Note that CFLAGS is not passed to @@ -212,6 +338,15 @@ LDFLAGS="?" PATH="?" 'configure' uses this to find programs. +In some cases it may be necessary to work around configuration results that do +not match reality. For example, Linux 4.5 added support for the MADV_FREE flag +to madvise(2), which can cause problems if building on a host with MADV_FREE +support and deploying to a target without. To work around this, use a cache +file to override the relevant configuration variable defined in configure.ac, +e.g.: + + echo "je_cv_madv_free=no" > config.cache && ./configure -C + === Advanced compilation ======================================================= To build only parts of jemalloc, use the following targets: diff --git a/Makefile.in b/Makefile.in index e411804a2d..e49a8711c1 100644 --- a/Makefile.in +++ b/Makefile.in @@ -24,7 +24,10 @@ abs_objroot := @abs_objroot@ # Build parameters. CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include -CFLAGS := @CFLAGS@ +CONFIGURE_CFLAGS := @CONFIGURE_CFLAGS@ +SPECIFIED_CFLAGS := @SPECIFIED_CFLAGS@ +EXTRA_CFLAGS := @EXTRA_CFLAGS@ +CFLAGS := $(strip $(CONFIGURE_CFLAGS) $(SPECIFIED_CFLAGS) $(EXTRA_CFLAGS)) LDFLAGS := @LDFLAGS@ EXTRA_LDFLAGS := @EXTRA_LDFLAGS@ LIBS := @LIBS@ @@ -42,23 +45,29 @@ XSLTPROC := @XSLTPROC@ AUTOCONF := @AUTOCONF@ _RPATH = @RPATH@ RPATH = $(if $(1),$(call _RPATH,$(1))) -cfghdrs_in := @cfghdrs_in@ +cfghdrs_in := $(addprefix $(srcroot),@cfghdrs_in@) cfghdrs_out := @cfghdrs_out@ -cfgoutputs_in := @cfgoutputs_in@ +cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@) cfgoutputs_out := @cfgoutputs_out@ enable_autogen := @enable_autogen@ enable_code_coverage := @enable_code_coverage@ +enable_prof := @enable_prof@ enable_valgrind := @enable_valgrind@ enable_zone_allocator := @enable_zone_allocator@ +MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF +link_whole_archive := @link_whole_archive@ DSO_LDFLAGS = @DSO_LDFLAGS@ SOREV = @SOREV@ PIC_CFLAGS = @PIC_CFLAGS@ CTARGET = @CTARGET@ LDTARGET = @LDTARGET@ +TEST_LD_MODE = @TEST_LD_MODE@ MKLIB = @MKLIB@ AR = @AR@ ARFLAGS = @ARFLAGS@ CC_MM = @CC_MM@ +LM := @LM@ +INSTALL = @INSTALL@ ifeq (macho, $(ABI)) TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib" @@ -73,16 +82,36 @@ endif LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix) # Lists of files. -BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh +BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h -C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \ - $(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \ - $(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \ - $(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \ - $(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \ - $(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/prof.c \ - $(srcroot)src/quarantine.c $(srcroot)src/rtree.c $(srcroot)src/stats.c \ - $(srcroot)src/tcache.c $(srcroot)src/util.c $(srcroot)src/tsd.c +C_SRCS := $(srcroot)src/jemalloc.c \ + $(srcroot)src/arena.c \ + $(srcroot)src/atomic.c \ + $(srcroot)src/base.c \ + $(srcroot)src/bitmap.c \ + $(srcroot)src/chunk.c \ + $(srcroot)src/chunk_dss.c \ + $(srcroot)src/chunk_mmap.c \ + $(srcroot)src/ckh.c \ + $(srcroot)src/ctl.c \ + $(srcroot)src/extent.c \ + $(srcroot)src/hash.c \ + $(srcroot)src/huge.c \ + $(srcroot)src/mb.c \ + $(srcroot)src/mutex.c \ + $(srcroot)src/nstime.c \ + $(srcroot)src/pages.c \ + $(srcroot)src/prng.c \ + $(srcroot)src/prof.c \ + $(srcroot)src/quarantine.c \ + $(srcroot)src/rtree.c \ + $(srcroot)src/stats.c \ + $(srcroot)src/spin.c \ + $(srcroot)src/tcache.c \ + $(srcroot)src/ticker.c \ + $(srcroot)src/tsd.c \ + $(srcroot)src/util.c \ + $(srcroot)src/witness.c ifeq ($(enable_valgrind), 1) C_SRCS += $(srcroot)src/valgrind.c endif @@ -101,49 +130,85 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV) ifneq ($(SOREV),$(SO)) DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO) endif +ifeq (1, $(link_whole_archive)) +LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive +else +LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) +endif +PC := $(objroot)jemalloc.pc MAN3 := $(objroot)doc/jemalloc$(install_suffix).3 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml -DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html) -DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3) +DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html) +DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3) DOCS := $(DOCS_HTML) $(DOCS_MAN3) -C_TESTLIB_SRCS := $(srcroot)test/src/math.c $(srcroot)test/src/mtx.c \ +C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \ + $(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \ + $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \ $(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \ - $(srcroot)test/src/thd.c -C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c -TESTS_UNIT := $(srcroot)test/unit/bitmap.c \ + $(srcroot)test/src/thd.c $(srcroot)test/src/timer.c +ifeq (1, $(link_whole_archive)) +C_UTIL_INTEGRATION_SRCS := +else +C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c +endif +TESTS_UNIT := \ + $(srcroot)test/unit/a0.c \ + $(srcroot)test/unit/arena_reset.c \ + $(srcroot)test/unit/atomic.c \ + $(srcroot)test/unit/bitmap.c \ $(srcroot)test/unit/ckh.c \ + $(srcroot)test/unit/decay.c \ + $(srcroot)test/unit/extent_quantize.c \ + $(srcroot)test/unit/fork.c \ $(srcroot)test/unit/hash.c \ $(srcroot)test/unit/junk.c \ + $(srcroot)test/unit/junk_alloc.c \ + $(srcroot)test/unit/junk_free.c \ + $(srcroot)test/unit/lg_chunk.c \ $(srcroot)test/unit/mallctl.c \ $(srcroot)test/unit/math.c \ $(srcroot)test/unit/mq.c \ $(srcroot)test/unit/mtx.c \ + $(srcroot)test/unit/pack.c \ + $(srcroot)test/unit/pages.c \ + $(srcroot)test/unit/ph.c \ + $(srcroot)test/unit/prng.c \ $(srcroot)test/unit/prof_accum.c \ + $(srcroot)test/unit/prof_active.c \ $(srcroot)test/unit/prof_gdump.c \ $(srcroot)test/unit/prof_idump.c \ + $(srcroot)test/unit/prof_reset.c \ + $(srcroot)test/unit/prof_thread_name.c \ $(srcroot)test/unit/ql.c \ $(srcroot)test/unit/qr.c \ $(srcroot)test/unit/quarantine.c \ $(srcroot)test/unit/rb.c \ $(srcroot)test/unit/rtree.c \ + $(srcroot)test/unit/run_quantize.c \ $(srcroot)test/unit/SFMT.c \ + $(srcroot)test/unit/size_classes.c \ + $(srcroot)test/unit/smoothstep.c \ $(srcroot)test/unit/stats.c \ + $(srcroot)test/unit/stats_print.c \ + $(srcroot)test/unit/ticker.c \ + $(srcroot)test/unit/nstime.c \ $(srcroot)test/unit/tsd.c \ $(srcroot)test/unit/util.c \ + $(srcroot)test/unit/witness.c \ $(srcroot)test/unit/zero.c -TESTS_UNIT_AUX := $(srcroot)test/unit/prof_accum_a.c \ - $(srcroot)test/unit/prof_accum_b.c TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \ $(srcroot)test/integration/allocated.c \ + $(srcroot)test/integration/sdallocx.c \ $(srcroot)test/integration/mallocx.c \ $(srcroot)test/integration/MALLOCX_ARENA.c \ - $(srcroot)test/integration/mremap.c \ + $(srcroot)test/integration/overflow.c \ $(srcroot)test/integration/posix_memalign.c \ $(srcroot)test/integration/rallocx.c \ $(srcroot)test/integration/thread_arena.c \ $(srcroot)test/integration/thread_tcache_enabled.c \ - $(srcroot)test/integration/xallocx.c -TESTS_STRESS := + $(srcroot)test/integration/xallocx.c \ + $(srcroot)test/integration/chunk.c +TESTS_STRESS := $(srcroot)test/stress/microbench.c TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS) C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O)) @@ -156,10 +221,9 @@ C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O)) C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS) TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_UNIT_AUX_OBJS := $(TESTS_UNIT_AUX:$(srcroot)%.c=$(objroot)%.$(O)) TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O)) TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O)) -TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) +TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS) .PHONY: all dist build_doc_html build_doc_man build_doc .PHONY: install_bin install_include install_lib @@ -173,10 +237,10 @@ all: build_lib dist: build_doc -$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl +$(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl $(XSLTPROC) -o $@ $(objroot)doc/html.xsl $< -$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl +$(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl $(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $< build_doc_html: $(DOCS_HTML) @@ -208,18 +272,12 @@ $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/% $(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB $(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include $(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST -$(TESTS_UNIT_AUX_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST -define make-unit-link-dep -$(1): TESTS_UNIT_LINK_OBJS += $(2) -$(1): $(2) -endef -$(foreach test, $(TESTS_UNIT:$(srcroot)test/unit/%.c=$(objroot)test/unit/%$(EXE)), $(eval $(call make-unit-link-dep,$(test),$(filter $(test:%=%_a.$(O)) $(test:%=%_b.$(O)),$(TESTS_UNIT_AUX_OBJS))))) $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include ifneq ($(IMPORTLIB),$(SO)) -$(C_OBJS): CPPFLAGS += -DDLLEXPORT +$(C_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT endif ifndef CC_MM @@ -228,7 +286,7 @@ HEADER_DIRS = $(srcroot)include/jemalloc/internal \ $(objroot)include/jemalloc $(objroot)include/jemalloc/internal HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS) -$(TESTS_OBJS): $(objroot)test/unit/jemalloc_test.h +$(TESTS_OBJS): $(objroot)test/include/test/jemalloc_test.h endif $(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O): @@ -258,62 +316,69 @@ $(STATIC_LIBS): $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) + $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS) $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS) + $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS) $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) @mkdir -p $(@D) - $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS) + $(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS) build_lib_shared: $(DSOS) build_lib_static: $(STATIC_LIBS) build_lib: build_lib_shared build_lib_static install_bin: - install -d $(BINDIR) + $(INSTALL) -d $(BINDIR) @for b in $(BINS); do \ - echo "install -m 755 $$b $(BINDIR)"; \ - install -m 755 $$b $(BINDIR); \ + echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \ + $(INSTALL) -m 755 $$b $(BINDIR); \ done install_include: - install -d $(INCLUDEDIR)/jemalloc + $(INSTALL) -d $(INCLUDEDIR)/jemalloc @for h in $(C_HDRS); do \ - echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ - install -m 644 $$h $(INCLUDEDIR)/jemalloc; \ + echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ + $(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \ done install_lib_shared: $(DSOS) - install -d $(LIBDIR) - install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR) + $(INSTALL) -d $(LIBDIR) + $(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR) ifneq ($(SOREV),$(SO)) ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO) endif install_lib_static: $(STATIC_LIBS) - install -d $(LIBDIR) + $(INSTALL) -d $(LIBDIR) @for l in $(STATIC_LIBS); do \ - echo "install -m 755 $$l $(LIBDIR)"; \ - install -m 755 $$l $(LIBDIR); \ + echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \ + $(INSTALL) -m 755 $$l $(LIBDIR); \ done -install_lib: install_lib_shared install_lib_static +install_lib_pc: $(PC) + $(INSTALL) -d $(LIBDIR)/pkgconfig + @for l in $(PC); do \ + echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \ + $(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \ +done + +install_lib: install_lib_shared install_lib_static install_lib_pc install_doc_html: - install -d $(DATADIR)/doc/jemalloc$(install_suffix) + $(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix) @for d in $(DOCS_HTML); do \ - echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \ - install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \ + echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \ + $(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \ done install_doc_man: - install -d $(MANDIR)/man3 + $(INSTALL) -d $(MANDIR)/man3 @for d in $(DOCS_MAN3); do \ - echo "install -m 644 $$d $(MANDIR)/man3"; \ - install -m 644 $$d $(MANDIR)/man3; \ + echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \ + $(INSTALL) -m 644 $$d $(MANDIR)/man3; \ done install_doc: install_doc_html install_doc_man @@ -329,18 +394,27 @@ check_unit_dir: @mkdir -p $(objroot)test/unit check_integration_dir: @mkdir -p $(objroot)test/integration -check_stress_dir: +stress_dir: @mkdir -p $(objroot)test/stress -check_dir: check_unit_dir check_integration_dir check_stress_dir +check_dir: check_unit_dir check_integration_dir check_unit: tests_unit check_unit_dir - $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:ratio" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) +check_integration_prof: tests_integration check_integration_dir +ifeq ($(enable_prof), 1) + $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) +endif +check_integration_decay: tests_integration check_integration_dir + $(MALLOC_CONF)="purge:decay,decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay,decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) + $(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) check_integration: tests_integration check_integration_dir $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) -check_stress: tests_stress check_stress_dir +stress: tests_stress stress_dir $(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%) -check: tests check_dir - $(SHELL) $(objroot)test/test.sh $(TESTS:$(srcroot)%.c=$(objroot)%) +check: check_unit check_integration check_integration_decay check_integration_prof ifeq ($(enable_code_coverage), 1) coverage_unit: check_unit @@ -354,7 +428,7 @@ coverage_integration: check_integration $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS) -coverage_stress: check_stress +coverage_stress: stress $(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS) $(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS) @@ -399,8 +473,9 @@ clean: rm -f $(objroot)*.gcov.* distclean: clean - rm -rf $(objroot)autom4te.cache + rm -f $(objroot)bin/jemalloc-config rm -f $(objroot)bin/jemalloc.sh + rm -f $(objroot)bin/jeprof rm -f $(objroot)config.log rm -f $(objroot)config.status rm -f $(objroot)config.stamp @@ -409,7 +484,7 @@ distclean: clean relclean: distclean rm -f $(objroot)configure - rm -f $(srcroot)VERSION + rm -f $(objroot)VERSION rm -f $(DOCS_HTML) rm -f $(DOCS_MAN3) diff --git a/README b/README index 9b268f4228..5ff24a9ef0 100644 --- a/README +++ b/README @@ -17,4 +17,4 @@ jemalloc. The ChangeLog file contains a brief summary of changes for each release. -URL: http://www.canonware.com/jemalloc/ +URL: http://jemalloc.net/ diff --git a/bin/jemalloc-config.in b/bin/jemalloc-config.in new file mode 100644 index 0000000000..b016c8d331 --- /dev/null +++ b/bin/jemalloc-config.in @@ -0,0 +1,79 @@ +#!/bin/sh + +usage() { + cat < +Options: + --help | -h : Print usage. + --version : Print jemalloc version. + --revision : Print shared library revision number. + --config : Print configure options used to build jemalloc. + --prefix : Print installation directory prefix. + --bindir : Print binary installation directory. + --datadir : Print data installation directory. + --includedir : Print include installation directory. + --libdir : Print library installation directory. + --mandir : Print manual page installation directory. + --cc : Print compiler used to build jemalloc. + --cflags : Print compiler flags used to build jemalloc. + --cppflags : Print preprocessor flags used to build jemalloc. + --ldflags : Print library flags used to build jemalloc. + --libs : Print libraries jemalloc was linked against. +EOF +} + +prefix="@prefix@" +exec_prefix="@exec_prefix@" + +case "$1" in +--help | -h) + usage + exit 0 + ;; +--version) + echo "@jemalloc_version@" + ;; +--revision) + echo "@rev@" + ;; +--config) + echo "@CONFIG@" + ;; +--prefix) + echo "@PREFIX@" + ;; +--bindir) + echo "@BINDIR@" + ;; +--datadir) + echo "@DATADIR@" + ;; +--includedir) + echo "@INCLUDEDIR@" + ;; +--libdir) + echo "@LIBDIR@" + ;; +--mandir) + echo "@MANDIR@" + ;; +--cc) + echo "@CC@" + ;; +--cflags) + echo "@CFLAGS@" + ;; +--cppflags) + echo "@CPPFLAGS@" + ;; +--ldflags) + echo "@LDFLAGS@ @EXTRA_LDFLAGS@" + ;; +--libs) + echo "@LIBS@" + ;; +*) + usage + exit 1 +esac diff --git a/bin/pprof b/bin/jeprof.in old mode 100755 new mode 100644 similarity index 93% rename from bin/pprof rename to bin/jeprof.in index 328138cd98..42087fcec2 --- a/bin/pprof +++ b/bin/jeprof.in @@ -2,11 +2,11 @@ # Copyright (c) 1998-2007, Google Inc. # All rights reserved. -# +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above @@ -16,7 +16,7 @@ # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. -# +# # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -40,28 +40,28 @@ # # Examples: # -# % tools/pprof "program" "profile" +# % tools/jeprof "program" "profile" # Enters "interactive" mode # -# % tools/pprof --text "program" "profile" +# % tools/jeprof --text "program" "profile" # Generates one line per procedure # -# % tools/pprof --gv "program" "profile" +# % tools/jeprof --gv "program" "profile" # Generates annotated call-graph and displays via "gv" # -# % tools/pprof --gv --focus=Mutex "program" "profile" +# % tools/jeprof --gv --focus=Mutex "program" "profile" # Restrict to code paths that involve an entry that matches "Mutex" # -# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" +# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile" # Restrict to code paths that involve an entry that matches "Mutex" # and does not match "string" # -# % tools/pprof --list=IBF_CheckDocid "program" "profile" +# % tools/jeprof --list=IBF_CheckDocid "program" "profile" # Generates disassembly listing of all routines with at least one # sample that match the --list= pattern. The listing is # annotated with the flat and cumulative sample counts at each line. # -# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" +# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile" # Generates disassembly listing of all routines with at least one # sample that match the --disasm= pattern. The listing is # annotated with the flat and cumulative sample counts at each PC value. @@ -72,10 +72,11 @@ use strict; use warnings; use Getopt::Long; +my $JEPROF_VERSION = "@jemalloc_version@"; my $PPROF_VERSION = "2.0"; # These are the object tools we use which can come from a -# user-specified location using --tools, from the PPROF_TOOLS +# user-specified location using --tools, from the JEPROF_TOOLS # environment variable, or from the environment. my %obj_tool_map = ( "objdump" => "objdump", @@ -94,7 +95,7 @@ my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread my @KCACHEGRIND = ("kcachegrind"); my @PS2PDF = ("ps2pdf"); # These are used for dynamic profiles -my @URL_FETCHER = ("curl", "-s"); +my @URL_FETCHER = ("curl", "-s", "--fail"); # These are the web pages that servers need to support for dynamic profiles my $HEAP_PAGE = "/pprof/heap"; @@ -144,13 +145,13 @@ my $sep_address = undef; sub usage_string { return < +jeprof [options] is a space separated list of profile names. -pprof [options] +jeprof [options] is a list of profile files where each file contains the necessary symbol mappings as well as profile data (likely generated with --raw). -pprof [options] +jeprof [options] is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE Each name can be: @@ -161,9 +162,9 @@ pprof [options] $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. For instance: - pprof http://myserver.com:80$HEAP_PAGE + jeprof http://myserver.com:80$HEAP_PAGE If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). -pprof --symbols +jeprof --symbols Maps addresses to symbol names. In this mode, stdin should be a list of library mappings, in the same format as is found in the heap- and cpu-profile files (this loosely matches that of /proc/self/maps @@ -202,7 +203,7 @@ Output type: --pdf Generate PDF to stdout --svg Generate SVG to stdout --gif Generate GIF to stdout - --raw Generate symbolized pprof data (useful with remote fetch) + --raw Generate symbolized jeprof data (useful with remote fetch) Heap-Profile Options: --inuse_space Display in-use (mega)bytes [default] @@ -222,11 +223,14 @@ Call-graph Options: --nodefraction= Hide nodes below *total [default=.005] --edgefraction= Hide edges below *total [default=.001] --maxdegree= Max incoming/outgoing edges per node [default=8] - --focus= Focus on nodes matching - --ignore= Ignore nodes matching + --focus= Focus on backtraces with nodes matching + --thread= Show profile for thread + --ignore= Ignore backtraces with nodes matching --scale= Set GV scaling [default=0] --heapcheck Make nodes with non-0 object counts (i.e. direct leak generators) more visible + --retain= Retain only nodes that match + --exclude= Exclude all nodes that match Miscellaneous: --tools=[,...] \$PATH for object tool pathnames @@ -235,34 +239,34 @@ Miscellaneous: --version Version information Environment Variables: - PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof - PPROF_TOOLS Prefix for object tools pathnames + JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof + JEPROF_TOOLS Prefix for object tools pathnames Examples: -pprof /bin/ls ls.prof +jeprof /bin/ls ls.prof Enters "interactive" mode -pprof --text /bin/ls ls.prof +jeprof --text /bin/ls ls.prof Outputs one line per procedure -pprof --web /bin/ls ls.prof +jeprof --web /bin/ls ls.prof Displays annotated call-graph in web browser -pprof --gv /bin/ls ls.prof +jeprof --gv /bin/ls ls.prof Displays annotated call-graph via 'gv' -pprof --gv --focus=Mutex /bin/ls ls.prof +jeprof --gv --focus=Mutex /bin/ls ls.prof Restricts to code paths including a .*Mutex.* entry -pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof +jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof Code paths including Mutex but not string -pprof --list=getdir /bin/ls ls.prof +jeprof --list=getdir /bin/ls ls.prof (Per-line) annotated source listing for getdir() -pprof --disasm=getdir /bin/ls ls.prof +jeprof --disasm=getdir /bin/ls ls.prof (Per-PC) annotated disassembly for getdir() -pprof http://localhost:1234/ +jeprof http://localhost:1234/ Enters "interactive" mode -pprof --text localhost:1234 +jeprof --text localhost:1234 Outputs one line per procedure for localhost:1234 -pprof --raw localhost:1234 > ./local.raw -pprof --text ./local.raw +jeprof --raw localhost:1234 > ./local.raw +jeprof --text ./local.raw Fetches a remote profile for later analysis and then analyzes it in text mode. EOF @@ -270,7 +274,8 @@ EOF sub version_string { return < \$main::opt_edgefraction, "maxdegree=i" => \$main::opt_maxdegree, "focus=s" => \$main::opt_focus, + "thread=s" => \$main::opt_thread, "ignore=s" => \$main::opt_ignore, "scale=i" => \$main::opt_scale, "heapcheck" => \$main::opt_heapcheck, + "retain=s" => \$main::opt_retain, + "exclude=s" => \$main::opt_exclude, "inuse_space!" => \$main::opt_inuse_space, "inuse_objects!" => \$main::opt_inuse_objects, "alloc_space!" => \$main::opt_alloc_space, @@ -562,66 +573,12 @@ sub Init() { } } -sub Main() { - Init(); - $main::collected_profile = undef; - @main::profile_files = (); - $main::op_time = time(); - - # Printing symbols is special and requires a lot less info that most. - if ($main::opt_symbols) { - PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin - return; - } - - # Fetch all profile data - FetchDynamicProfiles(); - - # this will hold symbols that we read from the profile files - my $symbol_map = {}; - - # Read one profile, pick the last item on the list - my $data = ReadProfile($main::prog, pop(@main::profile_files)); - my $profile = $data->{profile}; - my $pcs = $data->{pcs}; - my $libs = $data->{libs}; # Info about main program and shared libraries - $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); - - # Add additional profiles, if available. - if (scalar(@main::profile_files) > 0) { - foreach my $pname (@main::profile_files) { - my $data2 = ReadProfile($main::prog, $pname); - $profile = AddProfile($profile, $data2->{profile}); - $pcs = AddPcs($pcs, $data2->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); - } - } - - # Subtract base from profile, if specified - if ($main::opt_base ne '') { - my $base = ReadProfile($main::prog, $main::opt_base); - $profile = SubtractProfile($profile, $base->{profile}); - $pcs = AddPcs($pcs, $base->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); - } +sub FilterAndPrint { + my ($profile, $symbols, $libs, $thread) = @_; # Get total data in profile my $total = TotalProfile($profile); - # Collect symbols - my $symbols; - if ($main::use_symbolized_profile) { - $symbols = FetchSymbols($pcs, $symbol_map); - } elsif ($main::use_symbol_page) { - $symbols = FetchSymbols($pcs); - } else { - # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, - # which may differ from the data from subsequent profiles, especially - # if they were run on different machines. Use appropriate libs for - # each pc somehow. - $symbols = ExtractSymbols($libs, $pcs); - } - # Remove uniniteresting stack items $profile = RemoveUninterestingFrames($symbols, $profile); @@ -656,7 +613,9 @@ sub Main() { # (only matters when --heapcheck is given but we must be # compatible with old branches that did not pass --heapcheck always): if ($total != 0) { - printf("Total: %s %s\n", Unparse($total), Units()); + printf("Total%s: %s %s\n", + (defined($thread) ? " (t$thread)" : ""), + Unparse($total), Units()); } PrintText($symbols, $flat, $cumulative, -1); } elsif ($main::opt_raw) { @@ -692,6 +651,77 @@ sub Main() { } else { InteractiveMode($profile, $symbols, $libs, $total); } +} + +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + + # Printing symbols is special and requires a lot less info that most. + if ($main::opt_symbols) { + PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin + return; + } + + # Fetch all profile data + FetchDynamicProfiles(); + + # this will hold symbols that we read from the profile files + my $symbol_map = {}; + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, pop(@main::profile_files)); + my $profile = $data->{profile}; + my $pcs = $data->{pcs}; + my $libs = $data->{libs}; # Info about main program and shared libraries + $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 0) { + foreach my $pname (@main::profile_files) { + my $data2 = ReadProfile($main::prog, $pname); + $profile = AddProfile($profile, $data2->{profile}); + $pcs = AddPcs($pcs, $data2->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); + } + + # Collect symbols + my $symbols; + if ($main::use_symbolized_profile) { + $symbols = FetchSymbols($pcs, $symbol_map); + } elsif ($main::use_symbol_page) { + $symbols = FetchSymbols($pcs); + } else { + # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, + # which may differ from the data from subsequent profiles, especially + # if they were run on different machines. Use appropriate libs for + # each pc somehow. + $symbols = ExtractSymbols($libs, $pcs); + } + + if (!defined($main::opt_thread)) { + FilterAndPrint($profile, $symbols, $libs); + } + if (defined($data->{threads})) { + foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) { + if (defined($main::opt_thread) && + ($main::opt_thread eq '*' || $main::opt_thread == $thread)) { + my $thread_profile = $data->{threads}{$thread}; + FilterAndPrint($thread_profile, $symbols, $libs, $thread); + } + } + } cleanup(); exit(0); @@ -780,14 +810,14 @@ sub InteractiveMode { $| = 1; # Make output unbuffered for interactive mode my ($orig_profile, $symbols, $libs, $total) = @_; - print STDERR "Welcome to pprof! For help, type 'help'.\n"; + print STDERR "Welcome to jeprof! For help, type 'help'.\n"; # Use ReadLine if it's installed and input comes from a console. if ( -t STDIN && !ReadlineMightFail() && defined(eval {require Term::ReadLine}) ) { - my $term = new Term::ReadLine 'pprof'; - while ( defined ($_ = $term->readline('(pprof) '))) { + my $term = new Term::ReadLine 'jeprof'; + while ( defined ($_ = $term->readline('(jeprof) '))) { $term->addhistory($_) if /\S/; if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { last; # exit when we get an interactive command to quit @@ -795,7 +825,7 @@ sub InteractiveMode { } } else { # don't have readline while (1) { - print STDERR "(pprof) "; + print STDERR "(jeprof) "; $_ = ; last if ! defined $_ ; s/\r//g; # turn windows-looking lines into unix-looking lines @@ -988,7 +1018,7 @@ sub ProcessProfile { sub InteractiveHelpMessage { print STDERR <