You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
906 lines
30 KiB
906 lines
30 KiB
//===-- memprof_allocator.cpp --------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file is a part of MemProfiler, a memory profiler.
|
|
//
|
|
// Implementation of MemProf's memory allocator, which uses the allocator
|
|
// from sanitizer_common.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "memprof_allocator.h"
|
|
#include "memprof_mapping.h"
|
|
#include "memprof_stack.h"
|
|
#include "memprof_thread.h"
|
|
#include "sanitizer_common/sanitizer_allocator_checks.h"
|
|
#include "sanitizer_common/sanitizer_allocator_interface.h"
|
|
#include "sanitizer_common/sanitizer_allocator_report.h"
|
|
#include "sanitizer_common/sanitizer_errno.h"
|
|
#include "sanitizer_common/sanitizer_file.h"
|
|
#include "sanitizer_common/sanitizer_flags.h"
|
|
#include "sanitizer_common/sanitizer_internal_defs.h"
|
|
#include "sanitizer_common/sanitizer_list.h"
|
|
#include "sanitizer_common/sanitizer_stackdepot.h"
|
|
|
|
#include <sched.h>
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
|
|
namespace __memprof {
|
|
|
|
static int GetCpuId(void) {
|
|
// _memprof_preinit is called via the preinit_array, which subsequently calls
|
|
// malloc. Since this is before _dl_init calls VDSO_SETUP, sched_getcpu
|
|
// will seg fault as the address of __vdso_getcpu will be null.
|
|
if (!memprof_init_done)
|
|
return -1;
|
|
return sched_getcpu();
|
|
}
|
|
|
|
// Compute the timestamp in ms.
|
|
static int GetTimestamp(void) {
|
|
// timespec_get will segfault if called from dl_init
|
|
if (!memprof_timestamp_inited) {
|
|
// By returning 0, this will be effectively treated as being
|
|
// timestamped at memprof init time (when memprof_init_timestamp_s
|
|
// is initialized).
|
|
return 0;
|
|
}
|
|
timespec ts;
|
|
clock_gettime(CLOCK_REALTIME, &ts);
|
|
return (ts.tv_sec - memprof_init_timestamp_s) * 1000 + ts.tv_nsec / 1000000;
|
|
}
|
|
|
|
static MemprofAllocator &get_allocator();
|
|
|
|
// The memory chunk allocated from the underlying allocator looks like this:
|
|
// H H U U U U U U
|
|
// H -- ChunkHeader (32 bytes)
|
|
// U -- user memory.
|
|
|
|
// If there is left padding before the ChunkHeader (due to use of memalign),
|
|
// we store a magic value in the first uptr word of the memory block and
|
|
// store the address of ChunkHeader in the next uptr.
|
|
// M B L L L L L L L L L H H U U U U U U
|
|
// | ^
|
|
// ---------------------|
|
|
// M -- magic value kAllocBegMagic
|
|
// B -- address of ChunkHeader pointing to the first 'H'
|
|
|
|
constexpr uptr kMaxAllowedMallocBits = 40;
|
|
|
|
// Should be no more than 32-bytes
|
|
struct ChunkHeader {
|
|
// 1-st 4 bytes.
|
|
u32 alloc_context_id;
|
|
// 2-nd 4 bytes
|
|
u32 cpu_id;
|
|
// 3-rd 4 bytes
|
|
u32 timestamp_ms;
|
|
// 4-th 4 bytes
|
|
// Note only 1 bit is needed for this flag if we need space in the future for
|
|
// more fields.
|
|
u32 from_memalign;
|
|
// 5-th and 6-th 4 bytes
|
|
// The max size of an allocation is 2^40 (kMaxAllowedMallocSize), so this
|
|
// could be shrunk to kMaxAllowedMallocBits if we need space in the future for
|
|
// more fields.
|
|
atomic_uint64_t user_requested_size;
|
|
// 23 bits available
|
|
// 7-th and 8-th 4 bytes
|
|
u64 data_type_id; // TODO: hash of type name
|
|
};
|
|
|
|
static const uptr kChunkHeaderSize = sizeof(ChunkHeader);
|
|
COMPILER_CHECK(kChunkHeaderSize == 32);
|
|
|
|
struct MemprofChunk : ChunkHeader {
|
|
uptr Beg() { return reinterpret_cast<uptr>(this) + kChunkHeaderSize; }
|
|
uptr UsedSize() {
|
|
return atomic_load(&user_requested_size, memory_order_relaxed);
|
|
}
|
|
void *AllocBeg() {
|
|
if (from_memalign)
|
|
return get_allocator().GetBlockBegin(reinterpret_cast<void *>(this));
|
|
return reinterpret_cast<void *>(this);
|
|
}
|
|
};
|
|
|
|
class LargeChunkHeader {
|
|
static constexpr uptr kAllocBegMagic =
|
|
FIRST_32_SECOND_64(0xCC6E96B9, 0xCC6E96B9CC6E96B9ULL);
|
|
atomic_uintptr_t magic;
|
|
MemprofChunk *chunk_header;
|
|
|
|
public:
|
|
MemprofChunk *Get() const {
|
|
return atomic_load(&magic, memory_order_acquire) == kAllocBegMagic
|
|
? chunk_header
|
|
: nullptr;
|
|
}
|
|
|
|
void Set(MemprofChunk *p) {
|
|
if (p) {
|
|
chunk_header = p;
|
|
atomic_store(&magic, kAllocBegMagic, memory_order_release);
|
|
return;
|
|
}
|
|
|
|
uptr old = kAllocBegMagic;
|
|
if (!atomic_compare_exchange_strong(&magic, &old, 0,
|
|
memory_order_release)) {
|
|
CHECK_EQ(old, kAllocBegMagic);
|
|
}
|
|
}
|
|
};
|
|
|
|
void FlushUnneededMemProfShadowMemory(uptr p, uptr size) {
|
|
// Since memprof's mapping is compacting, the shadow chunk may be
|
|
// not page-aligned, so we only flush the page-aligned portion.
|
|
ReleaseMemoryPagesToOS(MemToShadow(p), MemToShadow(p + size));
|
|
}
|
|
|
|
void MemprofMapUnmapCallback::OnMap(uptr p, uptr size) const {
|
|
// Statistics.
|
|
MemprofStats &thread_stats = GetCurrentThreadStats();
|
|
thread_stats.mmaps++;
|
|
thread_stats.mmaped += size;
|
|
}
|
|
void MemprofMapUnmapCallback::OnUnmap(uptr p, uptr size) const {
|
|
// We are about to unmap a chunk of user memory.
|
|
// Mark the corresponding shadow memory as not needed.
|
|
FlushUnneededMemProfShadowMemory(p, size);
|
|
// Statistics.
|
|
MemprofStats &thread_stats = GetCurrentThreadStats();
|
|
thread_stats.munmaps++;
|
|
thread_stats.munmaped += size;
|
|
}
|
|
|
|
AllocatorCache *GetAllocatorCache(MemprofThreadLocalMallocStorage *ms) {
|
|
CHECK(ms);
|
|
return &ms->allocator_cache;
|
|
}
|
|
|
|
struct MemInfoBlock {
|
|
u32 alloc_count;
|
|
u64 total_access_count, min_access_count, max_access_count;
|
|
u64 total_size;
|
|
u32 min_size, max_size;
|
|
u32 alloc_timestamp, dealloc_timestamp;
|
|
u64 total_lifetime;
|
|
u32 min_lifetime, max_lifetime;
|
|
u32 alloc_cpu_id, dealloc_cpu_id;
|
|
u32 num_migrated_cpu;
|
|
|
|
// Only compared to prior deallocated object currently.
|
|
u32 num_lifetime_overlaps;
|
|
u32 num_same_alloc_cpu;
|
|
u32 num_same_dealloc_cpu;
|
|
|
|
u64 data_type_id; // TODO: hash of type name
|
|
|
|
MemInfoBlock() : alloc_count(0) {}
|
|
|
|
MemInfoBlock(u32 size, u64 access_count, u32 alloc_timestamp,
|
|
u32 dealloc_timestamp, u32 alloc_cpu, u32 dealloc_cpu)
|
|
: alloc_count(1), total_access_count(access_count),
|
|
min_access_count(access_count), max_access_count(access_count),
|
|
total_size(size), min_size(size), max_size(size),
|
|
alloc_timestamp(alloc_timestamp), dealloc_timestamp(dealloc_timestamp),
|
|
total_lifetime(dealloc_timestamp - alloc_timestamp),
|
|
min_lifetime(total_lifetime), max_lifetime(total_lifetime),
|
|
alloc_cpu_id(alloc_cpu), dealloc_cpu_id(dealloc_cpu),
|
|
num_lifetime_overlaps(0), num_same_alloc_cpu(0),
|
|
num_same_dealloc_cpu(0) {
|
|
num_migrated_cpu = alloc_cpu_id != dealloc_cpu_id;
|
|
}
|
|
|
|
void Print(u64 id) {
|
|
u64 p;
|
|
if (flags()->print_terse) {
|
|
p = total_size * 100 / alloc_count;
|
|
Printf("MIB:%llu/%u/%d.%02d/%u/%u/", id, alloc_count, p / 100, p % 100,
|
|
min_size, max_size);
|
|
p = total_access_count * 100 / alloc_count;
|
|
Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_access_count,
|
|
max_access_count);
|
|
p = total_lifetime * 100 / alloc_count;
|
|
Printf("%d.%02d/%u/%u/", p / 100, p % 100, min_lifetime, max_lifetime);
|
|
Printf("%u/%u/%u/%u\n", num_migrated_cpu, num_lifetime_overlaps,
|
|
num_same_alloc_cpu, num_same_dealloc_cpu);
|
|
} else {
|
|
p = total_size * 100 / alloc_count;
|
|
Printf("Memory allocation stack id = %llu\n", id);
|
|
Printf("\talloc_count %u, size (ave/min/max) %d.%02d / %u / %u\n",
|
|
alloc_count, p / 100, p % 100, min_size, max_size);
|
|
p = total_access_count * 100 / alloc_count;
|
|
Printf("\taccess_count (ave/min/max): %d.%02d / %u / %u\n", p / 100,
|
|
p % 100, min_access_count, max_access_count);
|
|
p = total_lifetime * 100 / alloc_count;
|
|
Printf("\tlifetime (ave/min/max): %d.%02d / %u / %u\n", p / 100, p % 100,
|
|
min_lifetime, max_lifetime);
|
|
Printf("\tnum migrated: %u, num lifetime overlaps: %u, num same alloc "
|
|
"cpu: %u, num same dealloc_cpu: %u\n",
|
|
num_migrated_cpu, num_lifetime_overlaps, num_same_alloc_cpu,
|
|
num_same_dealloc_cpu);
|
|
}
|
|
}
|
|
|
|
static void printHeader() {
|
|
CHECK(flags()->print_terse);
|
|
Printf("MIB:StackID/AllocCount/AveSize/MinSize/MaxSize/AveAccessCount/"
|
|
"MinAccessCount/MaxAccessCount/AveLifetime/MinLifetime/MaxLifetime/"
|
|
"NumMigratedCpu/NumLifetimeOverlaps/NumSameAllocCpu/"
|
|
"NumSameDeallocCpu\n");
|
|
}
|
|
|
|
void Merge(MemInfoBlock &newMIB) {
|
|
alloc_count += newMIB.alloc_count;
|
|
|
|
total_access_count += newMIB.total_access_count;
|
|
min_access_count = Min(min_access_count, newMIB.min_access_count);
|
|
max_access_count = Max(max_access_count, newMIB.max_access_count);
|
|
|
|
total_size += newMIB.total_size;
|
|
min_size = Min(min_size, newMIB.min_size);
|
|
max_size = Max(max_size, newMIB.max_size);
|
|
|
|
total_lifetime += newMIB.total_lifetime;
|
|
min_lifetime = Min(min_lifetime, newMIB.min_lifetime);
|
|
max_lifetime = Max(max_lifetime, newMIB.max_lifetime);
|
|
|
|
// We know newMIB was deallocated later, so just need to check if it was
|
|
// allocated before last one deallocated.
|
|
num_lifetime_overlaps += newMIB.alloc_timestamp < dealloc_timestamp;
|
|
alloc_timestamp = newMIB.alloc_timestamp;
|
|
dealloc_timestamp = newMIB.dealloc_timestamp;
|
|
|
|
num_same_alloc_cpu += alloc_cpu_id == newMIB.alloc_cpu_id;
|
|
num_same_dealloc_cpu += dealloc_cpu_id == newMIB.dealloc_cpu_id;
|
|
alloc_cpu_id = newMIB.alloc_cpu_id;
|
|
dealloc_cpu_id = newMIB.dealloc_cpu_id;
|
|
}
|
|
};
|
|
|
|
static u32 AccessCount = 0;
|
|
static u32 MissCount = 0;
|
|
|
|
struct SetEntry {
|
|
SetEntry() : id(0), MIB() {}
|
|
bool Empty() { return id == 0; }
|
|
void Print() {
|
|
CHECK(!Empty());
|
|
MIB.Print(id);
|
|
}
|
|
// The stack id
|
|
u64 id;
|
|
MemInfoBlock MIB;
|
|
};
|
|
|
|
struct CacheSet {
|
|
enum { kSetSize = 4 };
|
|
|
|
void PrintAll() {
|
|
for (int i = 0; i < kSetSize; i++) {
|
|
if (Entries[i].Empty())
|
|
continue;
|
|
Entries[i].Print();
|
|
}
|
|
}
|
|
void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
|
|
AccessCount++;
|
|
SetAccessCount++;
|
|
|
|
for (int i = 0; i < kSetSize; i++) {
|
|
auto id = Entries[i].id;
|
|
// Check if this is a hit or an empty entry. Since we always move any
|
|
// filled locations to the front of the array (see below), we don't need
|
|
// to look after finding the first empty entry.
|
|
if (id == new_id || !id) {
|
|
if (id == 0) {
|
|
Entries[i].id = new_id;
|
|
Entries[i].MIB = newMIB;
|
|
} else {
|
|
Entries[i].MIB.Merge(newMIB);
|
|
}
|
|
// Assuming some id locality, we try to swap the matching entry
|
|
// into the first set position.
|
|
if (i != 0) {
|
|
auto tmp = Entries[0];
|
|
Entries[0] = Entries[i];
|
|
Entries[i] = tmp;
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Miss
|
|
MissCount++;
|
|
SetMissCount++;
|
|
|
|
// We try to find the entries with the lowest alloc count to be evicted:
|
|
int min_idx = 0;
|
|
u64 min_count = Entries[0].MIB.alloc_count;
|
|
for (int i = 1; i < kSetSize; i++) {
|
|
CHECK(!Entries[i].Empty());
|
|
if (Entries[i].MIB.alloc_count < min_count) {
|
|
min_idx = i;
|
|
min_count = Entries[i].MIB.alloc_count;
|
|
}
|
|
}
|
|
|
|
// Print the evicted entry profile information
|
|
if (!flags()->print_terse)
|
|
Printf("Evicted:\n");
|
|
Entries[min_idx].Print();
|
|
|
|
// Similar to the hit case, put new MIB in first set position.
|
|
if (min_idx != 0)
|
|
Entries[min_idx] = Entries[0];
|
|
Entries[0].id = new_id;
|
|
Entries[0].MIB = newMIB;
|
|
}
|
|
|
|
void PrintMissRate(int i) {
|
|
u64 p = SetAccessCount ? SetMissCount * 10000ULL / SetAccessCount : 0;
|
|
Printf("Set %d miss rate: %d / %d = %5d.%02d%%\n", i, SetMissCount,
|
|
SetAccessCount, p / 100, p % 100);
|
|
}
|
|
|
|
SetEntry Entries[kSetSize];
|
|
u32 SetAccessCount = 0;
|
|
u32 SetMissCount = 0;
|
|
};
|
|
|
|
struct MemInfoBlockCache {
|
|
MemInfoBlockCache() {
|
|
if (common_flags()->print_module_map)
|
|
DumpProcessMap();
|
|
if (flags()->print_terse)
|
|
MemInfoBlock::printHeader();
|
|
Sets =
|
|
(CacheSet *)malloc(sizeof(CacheSet) * flags()->mem_info_cache_entries);
|
|
Constructed = true;
|
|
}
|
|
|
|
~MemInfoBlockCache() { free(Sets); }
|
|
|
|
void insertOrMerge(u64 new_id, MemInfoBlock &newMIB) {
|
|
u64 hv = new_id;
|
|
|
|
// Use mod method where number of entries should be a prime close to power
|
|
// of 2.
|
|
hv %= flags()->mem_info_cache_entries;
|
|
|
|
return Sets[hv].insertOrMerge(new_id, newMIB);
|
|
}
|
|
|
|
void PrintAll() {
|
|
for (int i = 0; i < flags()->mem_info_cache_entries; i++) {
|
|
Sets[i].PrintAll();
|
|
}
|
|
}
|
|
|
|
void PrintMissRate() {
|
|
if (!flags()->print_mem_info_cache_miss_rate)
|
|
return;
|
|
u64 p = AccessCount ? MissCount * 10000ULL / AccessCount : 0;
|
|
Printf("Overall miss rate: %d / %d = %5d.%02d%%\n", MissCount, AccessCount,
|
|
p / 100, p % 100);
|
|
if (flags()->print_mem_info_cache_miss_rate_details)
|
|
for (int i = 0; i < flags()->mem_info_cache_entries; i++)
|
|
Sets[i].PrintMissRate(i);
|
|
}
|
|
|
|
CacheSet *Sets;
|
|
// Flag when the Sets have been allocated, in case a deallocation is called
|
|
// very early before the static init of the Allocator and therefore this table
|
|
// have completed.
|
|
bool Constructed = false;
|
|
};
|
|
|
|
// Accumulates the access count from the shadow for the given pointer and size.
|
|
u64 GetShadowCount(uptr p, u32 size) {
|
|
u64 *shadow = (u64 *)MEM_TO_SHADOW(p);
|
|
u64 *shadow_end = (u64 *)MEM_TO_SHADOW(p + size);
|
|
u64 count = 0;
|
|
for (; shadow <= shadow_end; shadow++)
|
|
count += *shadow;
|
|
return count;
|
|
}
|
|
|
|
// Clears the shadow counters (when memory is allocated).
|
|
void ClearShadow(uptr addr, uptr size) {
|
|
CHECK(AddrIsAlignedByGranularity(addr));
|
|
CHECK(AddrIsInMem(addr));
|
|
CHECK(AddrIsAlignedByGranularity(addr + size));
|
|
CHECK(AddrIsInMem(addr + size - SHADOW_GRANULARITY));
|
|
CHECK(REAL(memset));
|
|
uptr shadow_beg = MEM_TO_SHADOW(addr);
|
|
uptr shadow_end = MEM_TO_SHADOW(addr + size - SHADOW_GRANULARITY) + 1;
|
|
if (shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
|
|
REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
|
|
} else {
|
|
uptr page_size = GetPageSizeCached();
|
|
uptr page_beg = RoundUpTo(shadow_beg, page_size);
|
|
uptr page_end = RoundDownTo(shadow_end, page_size);
|
|
|
|
if (page_beg >= page_end) {
|
|
REAL(memset)((void *)shadow_beg, 0, shadow_end - shadow_beg);
|
|
} else {
|
|
if (page_beg != shadow_beg) {
|
|
REAL(memset)((void *)shadow_beg, 0, page_beg - shadow_beg);
|
|
}
|
|
if (page_end != shadow_end) {
|
|
REAL(memset)((void *)page_end, 0, shadow_end - page_end);
|
|
}
|
|
ReserveShadowMemoryRange(page_beg, page_end - 1, nullptr);
|
|
}
|
|
}
|
|
}
|
|
|
|
struct Allocator {
|
|
static const uptr kMaxAllowedMallocSize = 1ULL << kMaxAllowedMallocBits;
|
|
|
|
MemprofAllocator allocator;
|
|
StaticSpinMutex fallback_mutex;
|
|
AllocatorCache fallback_allocator_cache;
|
|
|
|
uptr max_user_defined_malloc_size;
|
|
atomic_uint8_t rss_limit_exceeded;
|
|
|
|
MemInfoBlockCache MemInfoBlockTable;
|
|
bool destructing;
|
|
|
|
// ------------------- Initialization ------------------------
|
|
explicit Allocator(LinkerInitialized) : destructing(false) {}
|
|
|
|
~Allocator() { FinishAndPrint(); }
|
|
|
|
void FinishAndPrint() {
|
|
if (!flags()->print_terse)
|
|
Printf("Live on exit:\n");
|
|
allocator.ForceLock();
|
|
allocator.ForEachChunk(
|
|
[](uptr chunk, void *alloc) {
|
|
u64 user_requested_size;
|
|
MemprofChunk *m =
|
|
((Allocator *)alloc)
|
|
->GetMemprofChunk((void *)chunk, user_requested_size);
|
|
if (!m)
|
|
return;
|
|
uptr user_beg = ((uptr)m) + kChunkHeaderSize;
|
|
u64 c = GetShadowCount(user_beg, user_requested_size);
|
|
long curtime = GetTimestamp();
|
|
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
|
|
m->cpu_id, GetCpuId());
|
|
((Allocator *)alloc)
|
|
->MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
|
|
},
|
|
this);
|
|
allocator.ForceUnlock();
|
|
|
|
destructing = true;
|
|
MemInfoBlockTable.PrintMissRate();
|
|
MemInfoBlockTable.PrintAll();
|
|
StackDepotPrintAll();
|
|
}
|
|
|
|
void InitLinkerInitialized() {
|
|
SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
|
|
allocator.InitLinkerInitialized(
|
|
common_flags()->allocator_release_to_os_interval_ms);
|
|
max_user_defined_malloc_size = common_flags()->max_allocation_size_mb
|
|
? common_flags()->max_allocation_size_mb
|
|
<< 20
|
|
: kMaxAllowedMallocSize;
|
|
}
|
|
|
|
bool RssLimitExceeded() {
|
|
return atomic_load(&rss_limit_exceeded, memory_order_relaxed);
|
|
}
|
|
|
|
void SetRssLimitExceeded(bool limit_exceeded) {
|
|
atomic_store(&rss_limit_exceeded, limit_exceeded, memory_order_relaxed);
|
|
}
|
|
|
|
// -------------------- Allocation/Deallocation routines ---------------
|
|
void *Allocate(uptr size, uptr alignment, BufferedStackTrace *stack,
|
|
AllocType alloc_type) {
|
|
if (UNLIKELY(!memprof_inited))
|
|
MemprofInitFromRtl();
|
|
if (RssLimitExceeded()) {
|
|
if (AllocatorMayReturnNull())
|
|
return nullptr;
|
|
ReportRssLimitExceeded(stack);
|
|
}
|
|
CHECK(stack);
|
|
const uptr min_alignment = MEMPROF_ALIGNMENT;
|
|
if (alignment < min_alignment)
|
|
alignment = min_alignment;
|
|
if (size == 0) {
|
|
// We'd be happy to avoid allocating memory for zero-size requests, but
|
|
// some programs/tests depend on this behavior and assume that malloc
|
|
// would not return NULL even for zero-size allocations. Moreover, it
|
|
// looks like operator new should never return NULL, and results of
|
|
// consecutive "new" calls must be different even if the allocated size
|
|
// is zero.
|
|
size = 1;
|
|
}
|
|
CHECK(IsPowerOfTwo(alignment));
|
|
uptr rounded_size = RoundUpTo(size, alignment);
|
|
uptr needed_size = rounded_size + kChunkHeaderSize;
|
|
if (alignment > min_alignment)
|
|
needed_size += alignment;
|
|
CHECK(IsAligned(needed_size, min_alignment));
|
|
if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize ||
|
|
size > max_user_defined_malloc_size) {
|
|
if (AllocatorMayReturnNull()) {
|
|
Report("WARNING: MemProfiler failed to allocate 0x%zx bytes\n",
|
|
(void *)size);
|
|
return nullptr;
|
|
}
|
|
uptr malloc_limit =
|
|
Min(kMaxAllowedMallocSize, max_user_defined_malloc_size);
|
|
ReportAllocationSizeTooBig(size, malloc_limit, stack);
|
|
}
|
|
|
|
MemprofThread *t = GetCurrentThread();
|
|
void *allocated;
|
|
if (t) {
|
|
AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
|
|
allocated = allocator.Allocate(cache, needed_size, 8);
|
|
} else {
|
|
SpinMutexLock l(&fallback_mutex);
|
|
AllocatorCache *cache = &fallback_allocator_cache;
|
|
allocated = allocator.Allocate(cache, needed_size, 8);
|
|
}
|
|
if (UNLIKELY(!allocated)) {
|
|
SetAllocatorOutOfMemory();
|
|
if (AllocatorMayReturnNull())
|
|
return nullptr;
|
|
ReportOutOfMemory(size, stack);
|
|
}
|
|
|
|
uptr alloc_beg = reinterpret_cast<uptr>(allocated);
|
|
uptr alloc_end = alloc_beg + needed_size;
|
|
uptr beg_plus_header = alloc_beg + kChunkHeaderSize;
|
|
uptr user_beg = beg_plus_header;
|
|
if (!IsAligned(user_beg, alignment))
|
|
user_beg = RoundUpTo(user_beg, alignment);
|
|
uptr user_end = user_beg + size;
|
|
CHECK_LE(user_end, alloc_end);
|
|
uptr chunk_beg = user_beg - kChunkHeaderSize;
|
|
MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
|
|
m->from_memalign = alloc_beg != chunk_beg;
|
|
CHECK(size);
|
|
|
|
m->cpu_id = GetCpuId();
|
|
m->timestamp_ms = GetTimestamp();
|
|
m->alloc_context_id = StackDepotPut(*stack);
|
|
|
|
uptr size_rounded_down_to_granularity =
|
|
RoundDownTo(size, SHADOW_GRANULARITY);
|
|
if (size_rounded_down_to_granularity)
|
|
ClearShadow(user_beg, size_rounded_down_to_granularity);
|
|
|
|
MemprofStats &thread_stats = GetCurrentThreadStats();
|
|
thread_stats.mallocs++;
|
|
thread_stats.malloced += size;
|
|
thread_stats.malloced_overhead += needed_size - size;
|
|
if (needed_size > SizeClassMap::kMaxSize)
|
|
thread_stats.malloc_large++;
|
|
else
|
|
thread_stats.malloced_by_size[SizeClassMap::ClassID(needed_size)]++;
|
|
|
|
void *res = reinterpret_cast<void *>(user_beg);
|
|
atomic_store(&m->user_requested_size, size, memory_order_release);
|
|
if (alloc_beg != chunk_beg) {
|
|
CHECK_LE(alloc_beg + sizeof(LargeChunkHeader), chunk_beg);
|
|
reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(m);
|
|
}
|
|
MEMPROF_MALLOC_HOOK(res, size);
|
|
return res;
|
|
}
|
|
|
|
void Deallocate(void *ptr, uptr delete_size, uptr delete_alignment,
|
|
BufferedStackTrace *stack, AllocType alloc_type) {
|
|
uptr p = reinterpret_cast<uptr>(ptr);
|
|
if (p == 0)
|
|
return;
|
|
|
|
MEMPROF_FREE_HOOK(ptr);
|
|
|
|
uptr chunk_beg = p - kChunkHeaderSize;
|
|
MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
|
|
|
|
u64 user_requested_size =
|
|
atomic_exchange(&m->user_requested_size, 0, memory_order_acquire);
|
|
if (memprof_inited && memprof_init_done && !destructing &&
|
|
MemInfoBlockTable.Constructed) {
|
|
u64 c = GetShadowCount(p, user_requested_size);
|
|
long curtime = GetTimestamp();
|
|
|
|
MemInfoBlock newMIB(user_requested_size, c, m->timestamp_ms, curtime,
|
|
m->cpu_id, GetCpuId());
|
|
{
|
|
SpinMutexLock l(&fallback_mutex);
|
|
MemInfoBlockTable.insertOrMerge(m->alloc_context_id, newMIB);
|
|
}
|
|
}
|
|
|
|
MemprofStats &thread_stats = GetCurrentThreadStats();
|
|
thread_stats.frees++;
|
|
thread_stats.freed += user_requested_size;
|
|
|
|
void *alloc_beg = m->AllocBeg();
|
|
if (alloc_beg != m) {
|
|
// Clear the magic value, as allocator internals may overwrite the
|
|
// contents of deallocated chunk, confusing GetMemprofChunk lookup.
|
|
reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Set(nullptr);
|
|
}
|
|
|
|
MemprofThread *t = GetCurrentThread();
|
|
if (t) {
|
|
AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
|
|
allocator.Deallocate(cache, alloc_beg);
|
|
} else {
|
|
SpinMutexLock l(&fallback_mutex);
|
|
AllocatorCache *cache = &fallback_allocator_cache;
|
|
allocator.Deallocate(cache, alloc_beg);
|
|
}
|
|
}
|
|
|
|
void *Reallocate(void *old_ptr, uptr new_size, BufferedStackTrace *stack) {
|
|
CHECK(old_ptr && new_size);
|
|
uptr p = reinterpret_cast<uptr>(old_ptr);
|
|
uptr chunk_beg = p - kChunkHeaderSize;
|
|
MemprofChunk *m = reinterpret_cast<MemprofChunk *>(chunk_beg);
|
|
|
|
MemprofStats &thread_stats = GetCurrentThreadStats();
|
|
thread_stats.reallocs++;
|
|
thread_stats.realloced += new_size;
|
|
|
|
void *new_ptr = Allocate(new_size, 8, stack, FROM_MALLOC);
|
|
if (new_ptr) {
|
|
CHECK_NE(REAL(memcpy), nullptr);
|
|
uptr memcpy_size = Min(new_size, m->UsedSize());
|
|
REAL(memcpy)(new_ptr, old_ptr, memcpy_size);
|
|
Deallocate(old_ptr, 0, 0, stack, FROM_MALLOC);
|
|
}
|
|
return new_ptr;
|
|
}
|
|
|
|
void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
|
|
if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
|
|
if (AllocatorMayReturnNull())
|
|
return nullptr;
|
|
ReportCallocOverflow(nmemb, size, stack);
|
|
}
|
|
void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC);
|
|
// If the memory comes from the secondary allocator no need to clear it
|
|
// as it comes directly from mmap.
|
|
if (ptr && allocator.FromPrimary(ptr))
|
|
REAL(memset)(ptr, 0, nmemb * size);
|
|
return ptr;
|
|
}
|
|
|
|
void CommitBack(MemprofThreadLocalMallocStorage *ms,
|
|
BufferedStackTrace *stack) {
|
|
AllocatorCache *ac = GetAllocatorCache(ms);
|
|
allocator.SwallowCache(ac);
|
|
}
|
|
|
|
// -------------------------- Chunk lookup ----------------------
|
|
|
|
// Assumes alloc_beg == allocator.GetBlockBegin(alloc_beg).
|
|
MemprofChunk *GetMemprofChunk(void *alloc_beg, u64 &user_requested_size) {
|
|
if (!alloc_beg)
|
|
return nullptr;
|
|
MemprofChunk *p = reinterpret_cast<LargeChunkHeader *>(alloc_beg)->Get();
|
|
if (!p) {
|
|
if (!allocator.FromPrimary(alloc_beg))
|
|
return nullptr;
|
|
p = reinterpret_cast<MemprofChunk *>(alloc_beg);
|
|
}
|
|
// The size is reset to 0 on deallocation (and a min of 1 on
|
|
// allocation).
|
|
user_requested_size =
|
|
atomic_load(&p->user_requested_size, memory_order_acquire);
|
|
if (user_requested_size)
|
|
return p;
|
|
return nullptr;
|
|
}
|
|
|
|
MemprofChunk *GetMemprofChunkByAddr(uptr p, u64 &user_requested_size) {
|
|
void *alloc_beg = allocator.GetBlockBegin(reinterpret_cast<void *>(p));
|
|
return GetMemprofChunk(alloc_beg, user_requested_size);
|
|
}
|
|
|
|
uptr AllocationSize(uptr p) {
|
|
u64 user_requested_size;
|
|
MemprofChunk *m = GetMemprofChunkByAddr(p, user_requested_size);
|
|
if (!m)
|
|
return 0;
|
|
if (m->Beg() != p)
|
|
return 0;
|
|
return user_requested_size;
|
|
}
|
|
|
|
void Purge(BufferedStackTrace *stack) { allocator.ForceReleaseToOS(); }
|
|
|
|
void PrintStats() { allocator.PrintStats(); }
|
|
|
|
void ForceLock() {
|
|
allocator.ForceLock();
|
|
fallback_mutex.Lock();
|
|
}
|
|
|
|
void ForceUnlock() {
|
|
fallback_mutex.Unlock();
|
|
allocator.ForceUnlock();
|
|
}
|
|
};
|
|
|
|
static Allocator instance(LINKER_INITIALIZED);
|
|
|
|
static MemprofAllocator &get_allocator() { return instance.allocator; }
|
|
|
|
void InitializeAllocator() { instance.InitLinkerInitialized(); }
|
|
|
|
void MemprofThreadLocalMallocStorage::CommitBack() {
|
|
GET_STACK_TRACE_MALLOC;
|
|
instance.CommitBack(this, &stack);
|
|
}
|
|
|
|
void PrintInternalAllocatorStats() { instance.PrintStats(); }
|
|
|
|
void memprof_free(void *ptr, BufferedStackTrace *stack, AllocType alloc_type) {
|
|
instance.Deallocate(ptr, 0, 0, stack, alloc_type);
|
|
}
|
|
|
|
void memprof_delete(void *ptr, uptr size, uptr alignment,
|
|
BufferedStackTrace *stack, AllocType alloc_type) {
|
|
instance.Deallocate(ptr, size, alignment, stack, alloc_type);
|
|
}
|
|
|
|
void *memprof_malloc(uptr size, BufferedStackTrace *stack) {
|
|
return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
|
|
}
|
|
|
|
void *memprof_calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
|
|
return SetErrnoOnNull(instance.Calloc(nmemb, size, stack));
|
|
}
|
|
|
|
void *memprof_reallocarray(void *p, uptr nmemb, uptr size,
|
|
BufferedStackTrace *stack) {
|
|
if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
|
|
errno = errno_ENOMEM;
|
|
if (AllocatorMayReturnNull())
|
|
return nullptr;
|
|
ReportReallocArrayOverflow(nmemb, size, stack);
|
|
}
|
|
return memprof_realloc(p, nmemb * size, stack);
|
|
}
|
|
|
|
void *memprof_realloc(void *p, uptr size, BufferedStackTrace *stack) {
|
|
if (!p)
|
|
return SetErrnoOnNull(instance.Allocate(size, 8, stack, FROM_MALLOC));
|
|
if (size == 0) {
|
|
if (flags()->allocator_frees_and_returns_null_on_realloc_zero) {
|
|
instance.Deallocate(p, 0, 0, stack, FROM_MALLOC);
|
|
return nullptr;
|
|
}
|
|
// Allocate a size of 1 if we shouldn't free() on Realloc to 0
|
|
size = 1;
|
|
}
|
|
return SetErrnoOnNull(instance.Reallocate(p, size, stack));
|
|
}
|
|
|
|
void *memprof_valloc(uptr size, BufferedStackTrace *stack) {
|
|
return SetErrnoOnNull(
|
|
instance.Allocate(size, GetPageSizeCached(), stack, FROM_MALLOC));
|
|
}
|
|
|
|
void *memprof_pvalloc(uptr size, BufferedStackTrace *stack) {
|
|
uptr PageSize = GetPageSizeCached();
|
|
if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
|
|
errno = errno_ENOMEM;
|
|
if (AllocatorMayReturnNull())
|
|
return nullptr;
|
|
ReportPvallocOverflow(size, stack);
|
|
}
|
|
// pvalloc(0) should allocate one page.
|
|
size = size ? RoundUpTo(size, PageSize) : PageSize;
|
|
return SetErrnoOnNull(instance.Allocate(size, PageSize, stack, FROM_MALLOC));
|
|
}
|
|
|
|
void *memprof_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
|
|
AllocType alloc_type) {
|
|
if (UNLIKELY(!IsPowerOfTwo(alignment))) {
|
|
errno = errno_EINVAL;
|
|
if (AllocatorMayReturnNull())
|
|
return nullptr;
|
|
ReportInvalidAllocationAlignment(alignment, stack);
|
|
}
|
|
return SetErrnoOnNull(instance.Allocate(size, alignment, stack, alloc_type));
|
|
}
|
|
|
|
void *memprof_aligned_alloc(uptr alignment, uptr size,
|
|
BufferedStackTrace *stack) {
|
|
if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
|
|
errno = errno_EINVAL;
|
|
if (AllocatorMayReturnNull())
|
|
return nullptr;
|
|
ReportInvalidAlignedAllocAlignment(size, alignment, stack);
|
|
}
|
|
return SetErrnoOnNull(instance.Allocate(size, alignment, stack, FROM_MALLOC));
|
|
}
|
|
|
|
int memprof_posix_memalign(void **memptr, uptr alignment, uptr size,
|
|
BufferedStackTrace *stack) {
|
|
if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
|
|
if (AllocatorMayReturnNull())
|
|
return errno_EINVAL;
|
|
ReportInvalidPosixMemalignAlignment(alignment, stack);
|
|
}
|
|
void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC);
|
|
if (UNLIKELY(!ptr))
|
|
// OOM error is already taken care of by Allocate.
|
|
return errno_ENOMEM;
|
|
CHECK(IsAligned((uptr)ptr, alignment));
|
|
*memptr = ptr;
|
|
return 0;
|
|
}
|
|
|
|
uptr memprof_malloc_usable_size(const void *ptr, uptr pc, uptr bp) {
|
|
if (!ptr)
|
|
return 0;
|
|
uptr usable_size = instance.AllocationSize(reinterpret_cast<uptr>(ptr));
|
|
return usable_size;
|
|
}
|
|
|
|
void MemprofSoftRssLimitExceededCallback(bool limit_exceeded) {
|
|
instance.SetRssLimitExceeded(limit_exceeded);
|
|
}
|
|
|
|
} // namespace __memprof
|
|
|
|
// ---------------------- Interface ---------------- {{{1
|
|
using namespace __memprof;
|
|
|
|
#if !SANITIZER_SUPPORTS_WEAK_HOOKS
|
|
// Provide default (no-op) implementation of malloc hooks.
|
|
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_malloc_hook, void *ptr,
|
|
uptr size) {
|
|
(void)ptr;
|
|
(void)size;
|
|
}
|
|
|
|
SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_free_hook, void *ptr) {
|
|
(void)ptr;
|
|
}
|
|
#endif
|
|
|
|
uptr __sanitizer_get_estimated_allocated_size(uptr size) { return size; }
|
|
|
|
int __sanitizer_get_ownership(const void *p) {
|
|
return memprof_malloc_usable_size(p, 0, 0) != 0;
|
|
}
|
|
|
|
uptr __sanitizer_get_allocated_size(const void *p) {
|
|
return memprof_malloc_usable_size(p, 0, 0);
|
|
}
|
|
|
|
int __memprof_profile_dump() {
|
|
instance.FinishAndPrint();
|
|
// In the future we may want to return non-zero if there are any errors
|
|
// detected during the dumping process.
|
|
return 0;
|
|
}
|