You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
954 lines
37 KiB
954 lines
37 KiB
/*
|
|
* Copyright (C) 2013 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
|
|
#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
|
|
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <sys/mman.h>
|
|
#include <memory>
|
|
#include <set>
|
|
#include <string>
|
|
#include <unordered_set>
|
|
#include <vector>
|
|
|
|
#include <android-base/logging.h>
|
|
|
|
#include "base/allocator.h"
|
|
#include "base/bit_utils.h"
|
|
#include "base/mem_map.h"
|
|
#include "base/mutex.h"
|
|
#include "runtime_globals.h"
|
|
#include "thread.h"
|
|
|
|
namespace art {
|
|
|
|
namespace gc {
|
|
namespace allocator {
|
|
|
|
// A runs-of-slots memory allocator.
|
|
class RosAlloc {
|
|
private:
|
|
// Represents a run of free pages.
|
|
class FreePageRun {
|
|
public:
|
|
uint8_t magic_num_; // The magic number used for debugging only.
|
|
|
|
bool IsFree() const {
|
|
return !kIsDebugBuild || magic_num_ == kMagicNumFree;
|
|
}
|
|
size_t ByteSize(RosAlloc* rosalloc) const REQUIRES(rosalloc->lock_) {
|
|
const uint8_t* fpr_base = reinterpret_cast<const uint8_t*>(this);
|
|
size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
|
|
size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
|
|
DCHECK_GE(byte_size, static_cast<size_t>(0));
|
|
DCHECK_ALIGNED(byte_size, kPageSize);
|
|
return byte_size;
|
|
}
|
|
void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
|
|
REQUIRES(rosalloc->lock_) {
|
|
DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
|
|
uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
|
|
size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
|
|
rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
|
|
}
|
|
void* Begin() {
|
|
return reinterpret_cast<void*>(this);
|
|
}
|
|
void* End(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
|
|
uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
|
|
uint8_t* end = fpr_base + ByteSize(rosalloc);
|
|
return end;
|
|
}
|
|
bool IsLargerThanPageReleaseThreshold(RosAlloc* rosalloc)
|
|
REQUIRES(rosalloc->lock_) {
|
|
return ByteSize(rosalloc) >= rosalloc->page_release_size_threshold_;
|
|
}
|
|
bool IsAtEndOfSpace(RosAlloc* rosalloc)
|
|
REQUIRES(rosalloc->lock_) {
|
|
return reinterpret_cast<uint8_t*>(this) + ByteSize(rosalloc) == rosalloc->base_ + rosalloc->footprint_;
|
|
}
|
|
bool ShouldReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
|
|
switch (rosalloc->page_release_mode_) {
|
|
case kPageReleaseModeNone:
|
|
return false;
|
|
case kPageReleaseModeEnd:
|
|
return IsAtEndOfSpace(rosalloc);
|
|
case kPageReleaseModeSize:
|
|
return IsLargerThanPageReleaseThreshold(rosalloc);
|
|
case kPageReleaseModeSizeAndEnd:
|
|
return IsLargerThanPageReleaseThreshold(rosalloc) && IsAtEndOfSpace(rosalloc);
|
|
case kPageReleaseModeAll:
|
|
return true;
|
|
default:
|
|
LOG(FATAL) << "Unexpected page release mode ";
|
|
return false;
|
|
}
|
|
}
|
|
void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
|
|
uint8_t* start = reinterpret_cast<uint8_t*>(this);
|
|
size_t byte_size = ByteSize(rosalloc);
|
|
DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
|
|
if (ShouldReleasePages(rosalloc)) {
|
|
rosalloc->ReleasePageRange(start, start + byte_size);
|
|
}
|
|
}
|
|
|
|
private:
|
|
DISALLOW_COPY_AND_ASSIGN(FreePageRun);
|
|
};
|
|
|
|
// The slot header.
|
|
class Slot {
|
|
public:
|
|
Slot* Next() const {
|
|
return next_;
|
|
}
|
|
void SetNext(Slot* next) {
|
|
next_ = next;
|
|
}
|
|
// The slot right before this slot in terms of the address.
|
|
Slot* Left(size_t bracket_size) {
|
|
return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) - bracket_size);
|
|
}
|
|
void Clear() {
|
|
next_ = nullptr;
|
|
}
|
|
|
|
private:
|
|
Slot* next_; // Next slot in the list.
|
|
friend class RosAlloc;
|
|
};
|
|
|
|
// We use the tail (kUseTail == true) for the bulk or thread-local free lists to avoid the need to
|
|
// traverse the list from the head to the tail when merging free lists.
|
|
// We don't use the tail (kUseTail == false) for the free list to avoid the need to manage the
|
|
// tail in the allocation fast path for a performance reason.
|
|
template<bool kUseTail = true>
|
|
class SlotFreeList {
|
|
public:
|
|
SlotFreeList() : head_(0U), tail_(0), size_(0), padding_(0) {}
|
|
Slot* Head() const {
|
|
return reinterpret_cast<Slot*>(head_);
|
|
}
|
|
Slot* Tail() const {
|
|
CHECK(kUseTail);
|
|
return reinterpret_cast<Slot*>(tail_);
|
|
}
|
|
size_t Size() const {
|
|
return size_;
|
|
}
|
|
// Removes from the head of the free list.
|
|
Slot* Remove() {
|
|
Slot* slot;
|
|
if (kIsDebugBuild) {
|
|
Verify();
|
|
}
|
|
Slot** headp = reinterpret_cast<Slot**>(&head_);
|
|
Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
|
|
Slot* old_head = *headp;
|
|
if (old_head == nullptr) {
|
|
// List was empty.
|
|
if (kUseTail) {
|
|
DCHECK(*tailp == nullptr);
|
|
}
|
|
return nullptr;
|
|
} else {
|
|
// List wasn't empty.
|
|
if (kUseTail) {
|
|
DCHECK(*tailp != nullptr);
|
|
}
|
|
Slot* old_head_next = old_head->Next();
|
|
slot = old_head;
|
|
*headp = old_head_next;
|
|
if (kUseTail && old_head_next == nullptr) {
|
|
// List becomes empty.
|
|
*tailp = nullptr;
|
|
}
|
|
}
|
|
slot->Clear();
|
|
--size_;
|
|
if (kIsDebugBuild) {
|
|
Verify();
|
|
}
|
|
return slot;
|
|
}
|
|
void Add(Slot* slot) {
|
|
if (kIsDebugBuild) {
|
|
Verify();
|
|
}
|
|
DCHECK(slot != nullptr);
|
|
DCHECK(slot->Next() == nullptr);
|
|
Slot** headp = reinterpret_cast<Slot**>(&head_);
|
|
Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
|
|
Slot* old_head = *headp;
|
|
if (old_head == nullptr) {
|
|
// List was empty.
|
|
if (kUseTail) {
|
|
DCHECK(*tailp == nullptr);
|
|
}
|
|
*headp = slot;
|
|
if (kUseTail) {
|
|
*tailp = slot;
|
|
}
|
|
} else {
|
|
// List wasn't empty.
|
|
if (kUseTail) {
|
|
DCHECK(*tailp != nullptr);
|
|
}
|
|
*headp = slot;
|
|
slot->SetNext(old_head);
|
|
}
|
|
++size_;
|
|
if (kIsDebugBuild) {
|
|
Verify();
|
|
}
|
|
}
|
|
// Merge the given list into this list. Empty the given list.
|
|
// Deliberately support only a kUseTail == true SlotFreeList parameter because 1) we don't
|
|
// currently have a situation where we need a kUseTail == false SlotFreeList parameter, and 2)
|
|
// supporting the kUseTail == false parameter would require a O(n) linked list traversal to do
|
|
// the merge if 'this' SlotFreeList has kUseTail == false, which we'd like to avoid.
|
|
void Merge(SlotFreeList<true>* list) {
|
|
if (kIsDebugBuild) {
|
|
Verify();
|
|
CHECK(list != nullptr);
|
|
list->Verify();
|
|
}
|
|
if (list->Size() == 0) {
|
|
return;
|
|
}
|
|
Slot** headp = reinterpret_cast<Slot**>(&head_);
|
|
Slot** tailp = kUseTail ? reinterpret_cast<Slot**>(&tail_) : nullptr;
|
|
Slot* old_head = *headp;
|
|
if (old_head == nullptr) {
|
|
// List was empty.
|
|
*headp = list->Head();
|
|
if (kUseTail) {
|
|
*tailp = list->Tail();
|
|
}
|
|
size_ = list->Size();
|
|
} else {
|
|
// List wasn't empty.
|
|
DCHECK(list->Head() != nullptr);
|
|
*headp = list->Head();
|
|
DCHECK(list->Tail() != nullptr);
|
|
list->Tail()->SetNext(old_head);
|
|
// if kUseTail, no change to tailp.
|
|
size_ += list->Size();
|
|
}
|
|
list->Reset();
|
|
if (kIsDebugBuild) {
|
|
Verify();
|
|
}
|
|
}
|
|
|
|
void Reset() {
|
|
head_ = 0;
|
|
if (kUseTail) {
|
|
tail_ = 0;
|
|
}
|
|
size_ = 0;
|
|
}
|
|
|
|
void Verify() {
|
|
Slot* head = reinterpret_cast<Slot*>(head_);
|
|
Slot* tail = kUseTail ? reinterpret_cast<Slot*>(tail_) : nullptr;
|
|
if (size_ == 0) {
|
|
CHECK(head == nullptr);
|
|
if (kUseTail) {
|
|
CHECK(tail == nullptr);
|
|
}
|
|
} else {
|
|
CHECK(head != nullptr);
|
|
if (kUseTail) {
|
|
CHECK(tail != nullptr);
|
|
}
|
|
size_t count = 0;
|
|
for (Slot* slot = head; slot != nullptr; slot = slot->Next()) {
|
|
++count;
|
|
if (kUseTail && slot->Next() == nullptr) {
|
|
CHECK_EQ(slot, tail);
|
|
}
|
|
}
|
|
CHECK_EQ(size_, count);
|
|
}
|
|
}
|
|
|
|
private:
|
|
// A pointer (Slot*) to the head of the list. Always 8 bytes so that we will have the same
|
|
// layout between 32 bit and 64 bit, which is not strictly necessary, but we do so for 1)
|
|
// uniformity, 2) we won't need to change this code if we move to a non-low 4G heap in the
|
|
// future, and 3) the space savings by using 32 bit fields in 32 bit would be lost in noise
|
|
// (won't open up enough space to cause an extra slot to be available).
|
|
uint64_t head_;
|
|
// A pointer (Slot*) to the tail of the list. Always 8 bytes so that we will have the same
|
|
// layout between 32 bit and 64 bit. The tail is stored to speed up merging of lists.
|
|
// Unused if kUseTail is false.
|
|
uint64_t tail_;
|
|
// The number of slots in the list. This is used to make it fast to check if a free list is all
|
|
// free without traversing the whole free list.
|
|
uint32_t size_;
|
|
uint32_t padding_ ATTRIBUTE_UNUSED;
|
|
friend class RosAlloc;
|
|
};
|
|
|
|
// Represents a run of memory slots of the same size.
|
|
//
|
|
// A run's memory layout:
|
|
//
|
|
// +-------------------+
|
|
// | magic_num |
|
|
// +-------------------+
|
|
// | size_bracket_idx |
|
|
// +-------------------+
|
|
// | is_thread_local |
|
|
// +-------------------+
|
|
// | to_be_bulk_freed |
|
|
// +-------------------+
|
|
// | |
|
|
// | free list |
|
|
// | |
|
|
// +-------------------+
|
|
// | |
|
|
// | bulk free list |
|
|
// | |
|
|
// +-------------------+
|
|
// | |
|
|
// | thread-local free |
|
|
// | list |
|
|
// | |
|
|
// +-------------------+
|
|
// | padding due to |
|
|
// | alignment |
|
|
// +-------------------+
|
|
// | slot 0 |
|
|
// +-------------------+
|
|
// | slot 1 |
|
|
// +-------------------+
|
|
// | slot 2 |
|
|
// +-------------------+
|
|
// ...
|
|
// +-------------------+
|
|
// | last slot |
|
|
// +-------------------+
|
|
//
|
|
class Run {
|
|
public:
|
|
uint8_t magic_num_; // The magic number used for debugging.
|
|
uint8_t size_bracket_idx_; // The index of the size bracket of this run.
|
|
uint8_t is_thread_local_; // True if this run is used as a thread-local run.
|
|
bool to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with
|
|
// a bulk free.
|
|
uint32_t padding_ ATTRIBUTE_UNUSED;
|
|
// Use a tailless free list for free_list_ so that the alloc fast path does not manage the tail.
|
|
SlotFreeList<false> free_list_;
|
|
SlotFreeList<true> bulk_free_list_;
|
|
SlotFreeList<true> thread_local_free_list_;
|
|
// Padding due to alignment
|
|
// Slot 0
|
|
// Slot 1
|
|
// ...
|
|
|
|
// Returns the byte size of the header.
|
|
static size_t fixed_header_size() {
|
|
return sizeof(Run);
|
|
}
|
|
Slot* FirstSlot() const {
|
|
const uint8_t idx = size_bracket_idx_;
|
|
return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]);
|
|
}
|
|
Slot* LastSlot() {
|
|
const uint8_t idx = size_bracket_idx_;
|
|
const size_t bracket_size = bracketSizes[idx];
|
|
uintptr_t end = reinterpret_cast<uintptr_t>(End());
|
|
Slot* last_slot = reinterpret_cast<Slot*>(end - bracket_size);
|
|
DCHECK_LE(FirstSlot(), last_slot);
|
|
return last_slot;
|
|
}
|
|
SlotFreeList<false>* FreeList() {
|
|
return &free_list_;
|
|
}
|
|
SlotFreeList<true>* BulkFreeList() {
|
|
return &bulk_free_list_;
|
|
}
|
|
SlotFreeList<true>* ThreadLocalFreeList() {
|
|
return &thread_local_free_list_;
|
|
}
|
|
void* End() {
|
|
return reinterpret_cast<uint8_t*>(this) + kPageSize * numOfPages[size_bracket_idx_];
|
|
}
|
|
void SetIsThreadLocal(bool is_thread_local) {
|
|
is_thread_local_ = is_thread_local ? 1 : 0;
|
|
}
|
|
bool IsThreadLocal() const {
|
|
return is_thread_local_ != 0;
|
|
}
|
|
// Set up the free list for a new/empty run.
|
|
void InitFreeList() {
|
|
const uint8_t idx = size_bracket_idx_;
|
|
const size_t bracket_size = bracketSizes[idx];
|
|
Slot* first_slot = FirstSlot();
|
|
// Add backwards so the first slot is at the head of the list.
|
|
for (Slot* slot = LastSlot(); slot >= first_slot; slot = slot->Left(bracket_size)) {
|
|
free_list_.Add(slot);
|
|
}
|
|
}
|
|
// Merge the thread local free list to the free list. Used when a thread-local run becomes
|
|
// full.
|
|
bool MergeThreadLocalFreeListToFreeList(bool* is_all_free_after_out);
|
|
// Merge the bulk free list to the free list. Used in a bulk free.
|
|
void MergeBulkFreeListToFreeList();
|
|
// Merge the bulk free list to the thread local free list. In a bulk free, as a two-step
|
|
// process, GC will first record all the slots to free in a run in the bulk free list where it
|
|
// can write without a lock, and later acquire a lock once per run to merge the bulk free list
|
|
// to the thread-local free list.
|
|
void MergeBulkFreeListToThreadLocalFreeList();
|
|
// Allocates a slot in a run.
|
|
ALWAYS_INLINE void* AllocSlot();
|
|
// Frees a slot in a run. This is used in a non-bulk free.
|
|
void FreeSlot(void* ptr);
|
|
// Add the given slot to the bulk free list. Returns the bracket size.
|
|
size_t AddToBulkFreeList(void* ptr);
|
|
// Add the given slot to the thread-local free list.
|
|
void AddToThreadLocalFreeList(void* ptr);
|
|
// Returns true if all the slots in the run are not in use.
|
|
bool IsAllFree() const {
|
|
return free_list_.Size() == numOfSlots[size_bracket_idx_];
|
|
}
|
|
// Returns the number of free slots.
|
|
size_t NumberOfFreeSlots() {
|
|
return free_list_.Size();
|
|
}
|
|
// Returns true if all the slots in the run are in use.
|
|
ALWAYS_INLINE bool IsFull();
|
|
// Returns true if the bulk free list is empty.
|
|
bool IsBulkFreeListEmpty() const {
|
|
return bulk_free_list_.Size() == 0;
|
|
}
|
|
// Returns true if the thread local free list is empty.
|
|
bool IsThreadLocalFreeListEmpty() const {
|
|
return thread_local_free_list_.Size() == 0;
|
|
}
|
|
// Zero the run's data.
|
|
void ZeroData();
|
|
// Zero the run's header and the slot headers.
|
|
void ZeroHeaderAndSlotHeaders();
|
|
// Iterate over all the slots and apply the given function.
|
|
void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
|
|
// Dump the run metadata for debugging.
|
|
std::string Dump();
|
|
// Verify for debugging.
|
|
void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_memory_tool)
|
|
REQUIRES(Locks::mutator_lock_)
|
|
REQUIRES(Locks::thread_list_lock_);
|
|
|
|
private:
|
|
// The common part of AddToBulkFreeList() and AddToThreadLocalFreeList(). Returns the bracket
|
|
// size.
|
|
size_t AddToFreeListShared(void* ptr, SlotFreeList<true>* free_list, const char* caller_name);
|
|
// Turns a FreeList into a string for debugging.
|
|
template<bool kUseTail>
|
|
std::string FreeListToStr(SlotFreeList<kUseTail>* free_list);
|
|
// Check a given pointer is a valid slot address and return it as Slot*.
|
|
Slot* ToSlot(void* ptr) {
|
|
const uint8_t idx = size_bracket_idx_;
|
|
const size_t bracket_size = bracketSizes[idx];
|
|
const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
|
|
- reinterpret_cast<uint8_t*>(FirstSlot());
|
|
DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
|
|
size_t slot_idx = offset_from_slot_base / bracket_size;
|
|
DCHECK_LT(slot_idx, numOfSlots[idx]);
|
|
return reinterpret_cast<Slot*>(ptr);
|
|
}
|
|
size_t SlotIndex(Slot* slot) const {
|
|
const uint8_t idx = size_bracket_idx_;
|
|
const size_t bracket_size = bracketSizes[idx];
|
|
const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
|
|
- reinterpret_cast<uint8_t*>(FirstSlot());
|
|
DCHECK_EQ(offset_from_slot_base % bracket_size, 0U);
|
|
size_t slot_idx = offset_from_slot_base / bracket_size;
|
|
DCHECK_LT(slot_idx, numOfSlots[idx]);
|
|
return slot_idx;
|
|
}
|
|
|
|
// TODO: DISALLOW_COPY_AND_ASSIGN(Run);
|
|
};
|
|
|
|
// The magic number for a run.
|
|
static constexpr uint8_t kMagicNum = 42;
|
|
// The magic number for free pages.
|
|
static constexpr uint8_t kMagicNumFree = 43;
|
|
// The number of size brackets.
|
|
static constexpr size_t kNumOfSizeBrackets = 42;
|
|
// The sizes (the slot sizes, in bytes) of the size brackets.
|
|
static size_t bracketSizes[kNumOfSizeBrackets];
|
|
// The numbers of pages that are used for runs for each size bracket.
|
|
static size_t numOfPages[kNumOfSizeBrackets];
|
|
// The numbers of slots of the runs for each size bracket.
|
|
static size_t numOfSlots[kNumOfSizeBrackets];
|
|
// The header sizes in bytes of the runs for each size bracket.
|
|
static size_t headerSizes[kNumOfSizeBrackets];
|
|
|
|
// Initialize the run specs (the above arrays).
|
|
static void Initialize();
|
|
static bool initialized_;
|
|
|
|
// Returns the byte size of the bracket size from the index.
|
|
static size_t IndexToBracketSize(size_t idx) {
|
|
DCHECK_LT(idx, kNumOfSizeBrackets);
|
|
return bracketSizes[idx];
|
|
}
|
|
// Returns the index of the size bracket from the bracket size.
|
|
static size_t BracketSizeToIndex(size_t size) {
|
|
DCHECK(8 <= size &&
|
|
((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) ||
|
|
(size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) ||
|
|
size == 1 * KB || size == 2 * KB));
|
|
size_t idx;
|
|
if (UNLIKELY(size == 1 * KB)) {
|
|
idx = kNumOfSizeBrackets - 2;
|
|
} else if (UNLIKELY(size == 2 * KB)) {
|
|
idx = kNumOfSizeBrackets - 1;
|
|
} else if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
|
|
DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U);
|
|
idx = size / kThreadLocalBracketQuantumSize - 1;
|
|
} else {
|
|
DCHECK(size <= kMaxRegularBracketSize);
|
|
DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U);
|
|
idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
|
|
+ kNumThreadLocalSizeBrackets;
|
|
}
|
|
DCHECK(bracketSizes[idx] == size);
|
|
return idx;
|
|
}
|
|
// Returns true if the given allocation size is for a thread local allocation.
|
|
static bool IsSizeForThreadLocal(size_t size) {
|
|
bool is_size_for_thread_local = size <= kMaxThreadLocalBracketSize;
|
|
DCHECK(size > kLargeSizeThreshold ||
|
|
(is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets)));
|
|
return is_size_for_thread_local;
|
|
}
|
|
// Rounds up the size up the nearest bracket size.
|
|
static size_t RoundToBracketSize(size_t size) {
|
|
DCHECK(size <= kLargeSizeThreshold);
|
|
if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
|
|
return RoundUp(size, kThreadLocalBracketQuantumSize);
|
|
} else if (size <= kMaxRegularBracketSize) {
|
|
return RoundUp(size, kBracketQuantumSize);
|
|
} else if (UNLIKELY(size <= 1 * KB)) {
|
|
return 1 * KB;
|
|
} else {
|
|
DCHECK_LE(size, 2 * KB);
|
|
return 2 * KB;
|
|
}
|
|
}
|
|
// Returns the size bracket index from the byte size with rounding.
|
|
static size_t SizeToIndex(size_t size) {
|
|
DCHECK(size <= kLargeSizeThreshold);
|
|
if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
|
|
return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1;
|
|
} else if (size <= kMaxRegularBracketSize) {
|
|
return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize
|
|
- 1 + kNumThreadLocalSizeBrackets;
|
|
} else if (size <= 1 * KB) {
|
|
return kNumOfSizeBrackets - 2;
|
|
} else {
|
|
DCHECK_LE(size, 2 * KB);
|
|
return kNumOfSizeBrackets - 1;
|
|
}
|
|
}
|
|
// A combination of SizeToIndex() and RoundToBracketSize().
|
|
static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
|
|
DCHECK(size <= kLargeSizeThreshold);
|
|
size_t idx;
|
|
size_t bracket_size;
|
|
if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
|
|
bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize);
|
|
idx = bracket_size / kThreadLocalBracketQuantumSize - 1;
|
|
} else if (size <= kMaxRegularBracketSize) {
|
|
bracket_size = RoundUp(size, kBracketQuantumSize);
|
|
idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
|
|
+ kNumThreadLocalSizeBrackets;
|
|
} else if (size <= 1 * KB) {
|
|
bracket_size = 1 * KB;
|
|
idx = kNumOfSizeBrackets - 2;
|
|
} else {
|
|
DCHECK(size <= 2 * KB);
|
|
bracket_size = 2 * KB;
|
|
idx = kNumOfSizeBrackets - 1;
|
|
}
|
|
DCHECK_EQ(idx, SizeToIndex(size)) << idx;
|
|
DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx;
|
|
DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx;
|
|
DCHECK_LE(size, bracket_size) << idx;
|
|
DCHECK(size > kMaxRegularBracketSize ||
|
|
(size <= kMaxThreadLocalBracketSize &&
|
|
bracket_size - size < kThreadLocalBracketQuantumSize) ||
|
|
(size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx;
|
|
*bracket_size_out = bracket_size;
|
|
return idx;
|
|
}
|
|
|
|
// Returns the page map index from an address. Requires that the
|
|
// address is page size aligned.
|
|
size_t ToPageMapIndex(const void* addr) const {
|
|
DCHECK_LE(base_, addr);
|
|
DCHECK_LT(addr, base_ + capacity_);
|
|
size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_;
|
|
DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
|
|
return byte_offset / kPageSize;
|
|
}
|
|
// Returns the page map index from an address with rounding.
|
|
size_t RoundDownToPageMapIndex(const void* addr) const {
|
|
DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_);
|
|
return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
|
|
}
|
|
|
|
// A memory allocation request larger than this size is treated as a large object and allocated
|
|
// at a page-granularity.
|
|
static const size_t kLargeSizeThreshold = 2048;
|
|
|
|
// If true, check that the returned memory is actually zero.
|
|
static constexpr bool kCheckZeroMemory = kIsDebugBuild;
|
|
// Do not check memory when running under a memory tool. In a normal
|
|
// build with kCheckZeroMemory the whole test should be optimized away.
|
|
// TODO: Unprotect before checks.
|
|
ALWAYS_INLINE bool ShouldCheckZeroMemory();
|
|
|
|
// If true, log verbose details of operations.
|
|
static constexpr bool kTraceRosAlloc = false;
|
|
|
|
struct hash_run {
|
|
size_t operator()(const RosAlloc::Run* r) const {
|
|
return reinterpret_cast<size_t>(r);
|
|
}
|
|
};
|
|
|
|
struct eq_run {
|
|
bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
|
|
return r1 == r2;
|
|
}
|
|
};
|
|
|
|
public:
|
|
// Different page release modes.
|
|
enum PageReleaseMode {
|
|
kPageReleaseModeNone, // Release no empty pages.
|
|
kPageReleaseModeEnd, // Release empty pages at the end of the space.
|
|
kPageReleaseModeSize, // Release empty pages that are larger than the threshold.
|
|
kPageReleaseModeSizeAndEnd, // Release empty pages that are larger than the threshold or
|
|
// at the end of the space.
|
|
kPageReleaseModeAll, // Release all empty pages.
|
|
};
|
|
|
|
// The default value for page_release_size_threshold_.
|
|
static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
|
|
|
|
// We use thread-local runs for the size brackets whose indexes
|
|
// are less than this index. We use shared (current) runs for the rest.
|
|
// Sync this with the length of Thread::rosalloc_runs_.
|
|
static const size_t kNumThreadLocalSizeBrackets = 16;
|
|
static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread,
|
|
"Mismatch between kNumThreadLocalSizeBrackets and "
|
|
"kNumRosAllocThreadLocalSizeBracketsInThread");
|
|
|
|
// The size of the largest bracket we use thread-local runs for.
|
|
// This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
|
|
static const size_t kMaxThreadLocalBracketSize = 128;
|
|
|
|
// We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than
|
|
// this index.
|
|
static const size_t kNumRegularSizeBrackets = 40;
|
|
|
|
// The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the
|
|
// 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1].
|
|
static const size_t kMaxRegularBracketSize = 512;
|
|
|
|
// The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes).
|
|
static constexpr size_t kThreadLocalBracketQuantumSize = 8;
|
|
|
|
// Equal to Log2(kThreadLocalBracketQuantumSize).
|
|
static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3;
|
|
|
|
// The bracket size increment for the non-thread-local, regular brackets (of size <=
|
|
// kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes).
|
|
static constexpr size_t kBracketQuantumSize = 16;
|
|
|
|
// Equal to Log2(kBracketQuantumSize).
|
|
static constexpr size_t kBracketQuantumSizeShift = 4;
|
|
|
|
private:
|
|
// The base address of the memory region that's managed by this allocator.
|
|
uint8_t* base_;
|
|
|
|
// The footprint in bytes of the currently allocated portion of the
|
|
// memory region.
|
|
size_t footprint_;
|
|
|
|
// The maximum footprint. The address, base_ + capacity_, indicates
|
|
// the end of the memory region that's currently managed by this allocator.
|
|
size_t capacity_;
|
|
|
|
// The maximum capacity. The address, base_ + max_capacity_, indicates
|
|
// the end of the memory region that's ever managed by this allocator.
|
|
size_t max_capacity_;
|
|
|
|
template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
|
|
using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>;
|
|
|
|
// The run sets that hold the runs whose slots are not all
|
|
// full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
|
|
AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets];
|
|
// The run sets that hold the runs whose slots are all full. This is
|
|
// debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
|
|
std::unordered_set<Run*, hash_run, eq_run, TrackingAllocator<Run*, kAllocatorTagRosAlloc>>
|
|
full_runs_[kNumOfSizeBrackets];
|
|
// The set of free pages.
|
|
AllocationTrackingSet<FreePageRun*, kAllocatorTagRosAlloc> free_page_runs_ GUARDED_BY(lock_);
|
|
// The dedicated full run, it is always full and shared by all threads when revoking happens.
|
|
// This is an optimization since enables us to avoid a null check for revoked runs.
|
|
static Run* dedicated_full_run_;
|
|
// Using size_t to ensure that it is at least word aligned.
|
|
static size_t dedicated_full_run_storage_[];
|
|
// The current runs where the allocations are first attempted for
|
|
// the size brackes that do not use thread-local
|
|
// runs. current_runs_[i] is guarded by size_bracket_locks_[i].
|
|
Run* current_runs_[kNumOfSizeBrackets];
|
|
// The mutexes, one per size bracket.
|
|
Mutex* size_bracket_locks_[kNumOfSizeBrackets];
|
|
// Bracket lock names (since locks only have char* names).
|
|
std::string size_bracket_lock_names_[kNumOfSizeBrackets];
|
|
// The types of page map entries.
|
|
enum PageMapKind {
|
|
kPageMapReleased = 0, // Zero and released back to the OS.
|
|
kPageMapEmpty, // Zero but probably dirty.
|
|
kPageMapRun, // The beginning of a run.
|
|
kPageMapRunPart, // The non-beginning part of a run.
|
|
kPageMapLargeObject, // The beginning of a large object.
|
|
kPageMapLargeObjectPart, // The non-beginning part of a large object.
|
|
};
|
|
// The table that indicates what pages are currently used for.
|
|
volatile uint8_t* page_map_; // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
|
|
size_t page_map_size_;
|
|
size_t max_page_map_size_;
|
|
MemMap page_map_mem_map_;
|
|
|
|
// The table that indicates the size of free page runs. These sizes
|
|
// are stored here to avoid storing in the free page header and
|
|
// release backing pages.
|
|
std::vector<size_t, TrackingAllocator<size_t, kAllocatorTagRosAlloc>> free_page_run_size_map_
|
|
GUARDED_BY(lock_);
|
|
// The global lock. Used to guard the page map, the free page set,
|
|
// and the footprint.
|
|
Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
|
|
// The reader-writer lock to allow one bulk free at a time while
|
|
// allowing multiple individual frees at the same time. Also, this
|
|
// is used to avoid race conditions between BulkFree() and
|
|
// RevokeThreadLocalRuns() on the bulk free list.
|
|
ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
|
|
|
|
// The page release mode.
|
|
const PageReleaseMode page_release_mode_;
|
|
// Under kPageReleaseModeSize(AndEnd), if the free page run size is
|
|
// greater than or equal to this value, release pages.
|
|
const size_t page_release_size_threshold_;
|
|
|
|
// Whether this allocator is running on a memory tool.
|
|
bool is_running_on_memory_tool_;
|
|
|
|
// The base address of the memory region that's managed by this allocator.
|
|
uint8_t* Begin() { return base_; }
|
|
// The end address of the memory region that's managed by this allocator.
|
|
uint8_t* End() { return base_ + capacity_; }
|
|
|
|
// Page-granularity alloc/free
|
|
void* AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type)
|
|
REQUIRES(lock_);
|
|
// Returns how many bytes were freed.
|
|
size_t FreePages(Thread* self, void* ptr, bool already_zero) REQUIRES(lock_);
|
|
|
|
// Allocate/free a run slot.
|
|
void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
|
|
size_t* bytes_tl_bulk_allocated)
|
|
REQUIRES(!lock_);
|
|
// Allocate/free a run slot without acquiring locks.
|
|
// TODO: REQUIRES(Locks::mutator_lock_)
|
|
void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
|
|
size_t* usable_size, size_t* bytes_tl_bulk_allocated)
|
|
REQUIRES(!lock_);
|
|
void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx) REQUIRES(!lock_);
|
|
|
|
// Returns the bracket size.
|
|
size_t FreeFromRun(Thread* self, void* ptr, Run* run)
|
|
REQUIRES(!lock_);
|
|
|
|
// Used to allocate a new thread local run for a size bracket.
|
|
Run* AllocRun(Thread* self, size_t idx) REQUIRES(!lock_);
|
|
|
|
// Used to acquire a new/reused run for a size bracket. Used when a
|
|
// thread-local or current run gets full.
|
|
Run* RefillRun(Thread* self, size_t idx) REQUIRES(!lock_);
|
|
|
|
// The internal of non-bulk Free().
|
|
size_t FreeInternal(Thread* self, void* ptr) REQUIRES(!lock_);
|
|
|
|
// Allocates large objects.
|
|
void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated,
|
|
size_t* usable_size, size_t* bytes_tl_bulk_allocated)
|
|
REQUIRES(!lock_);
|
|
|
|
// Revoke a run by adding it to non_full_runs_ or freeing the pages.
|
|
void RevokeRun(Thread* self, size_t idx, Run* run) REQUIRES(!lock_);
|
|
|
|
// Revoke the current runs which share an index with the thread local runs.
|
|
void RevokeThreadUnsafeCurrentRuns() REQUIRES(!lock_);
|
|
|
|
// Release a range of pages.
|
|
size_t ReleasePageRange(uint8_t* start, uint8_t* end) REQUIRES(lock_);
|
|
|
|
// Dumps the page map for debugging.
|
|
std::string DumpPageMap() REQUIRES(lock_);
|
|
|
|
public:
|
|
RosAlloc(void* base, size_t capacity, size_t max_capacity,
|
|
PageReleaseMode page_release_mode,
|
|
bool running_on_memory_tool,
|
|
size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
|
|
~RosAlloc();
|
|
|
|
static constexpr size_t RunFreeListOffset() {
|
|
return OFFSETOF_MEMBER(Run, free_list_);
|
|
}
|
|
static constexpr size_t RunFreeListHeadOffset() {
|
|
return OFFSETOF_MEMBER(SlotFreeList<false>, head_);
|
|
}
|
|
static constexpr size_t RunFreeListSizeOffset() {
|
|
return OFFSETOF_MEMBER(SlotFreeList<false>, size_);
|
|
}
|
|
static constexpr size_t RunSlotNextOffset() {
|
|
return OFFSETOF_MEMBER(Slot, next_);
|
|
}
|
|
|
|
// If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
|
|
// If used, this may cause race conditions if multiple threads are allocating at the same time.
|
|
template<bool kThreadSafe = true>
|
|
void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
|
|
size_t* bytes_tl_bulk_allocated)
|
|
REQUIRES(!lock_);
|
|
size_t Free(Thread* self, void* ptr)
|
|
REQUIRES(!bulk_free_lock_, !lock_);
|
|
size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
|
|
REQUIRES(!bulk_free_lock_, !lock_);
|
|
|
|
// Returns true if the given allocation request can be allocated in
|
|
// an existing thread local run without allocating a new run.
|
|
ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size);
|
|
// Allocate the given allocation request in an existing thread local
|
|
// run without allocating a new run.
|
|
ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated);
|
|
|
|
// Returns the maximum bytes that could be allocated for the given
|
|
// size in bulk, that is the maximum value for the
|
|
// bytes_allocated_bulk out param returned by RosAlloc::Alloc().
|
|
ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size);
|
|
|
|
// Returns the size of the allocated slot for a given allocated memory chunk.
|
|
size_t UsableSize(const void* ptr) REQUIRES(!lock_);
|
|
// Returns the size of the allocated slot for a given size.
|
|
size_t UsableSize(size_t bytes) {
|
|
if (UNLIKELY(bytes > kLargeSizeThreshold)) {
|
|
return RoundUp(bytes, kPageSize);
|
|
} else {
|
|
return RoundToBracketSize(bytes);
|
|
}
|
|
}
|
|
// Try to reduce the current footprint by releasing the free page
|
|
// run at the end of the memory region, if any.
|
|
bool Trim() REQUIRES(!lock_);
|
|
// Iterates over all the memory slots and apply the given function.
|
|
void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
|
|
void* arg)
|
|
REQUIRES(!lock_);
|
|
|
|
// Release empty pages.
|
|
size_t ReleasePages() REQUIRES(!lock_);
|
|
// Returns the current footprint.
|
|
size_t Footprint() REQUIRES(!lock_);
|
|
// Returns the current capacity, maximum footprint.
|
|
size_t FootprintLimit() REQUIRES(!lock_);
|
|
// Update the current capacity.
|
|
void SetFootprintLimit(size_t bytes) REQUIRES(!lock_);
|
|
|
|
// Releases the thread-local runs assigned to the given thread back to the common set of runs.
|
|
// Returns the total bytes of free slots in the revoked thread local runs. This is to be
|
|
// subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
|
|
size_t RevokeThreadLocalRuns(Thread* thread) REQUIRES(!lock_, !bulk_free_lock_);
|
|
// Releases the thread-local runs assigned to all the threads back to the common set of runs.
|
|
// Returns the total bytes of free slots in the revoked thread local runs. This is to be
|
|
// subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
|
|
size_t RevokeAllThreadLocalRuns() REQUIRES(!Locks::thread_list_lock_, !lock_, !bulk_free_lock_);
|
|
// Assert the thread local runs of a thread are revoked.
|
|
void AssertThreadLocalRunsAreRevoked(Thread* thread) REQUIRES(!bulk_free_lock_);
|
|
// Assert all the thread local runs are revoked.
|
|
void AssertAllThreadLocalRunsAreRevoked() REQUIRES(!Locks::thread_list_lock_, !bulk_free_lock_);
|
|
|
|
static Run* GetDedicatedFullRun() {
|
|
return dedicated_full_run_;
|
|
}
|
|
bool IsFreePage(size_t idx) const {
|
|
DCHECK_LT(idx, capacity_ / kPageSize);
|
|
uint8_t pm_type = page_map_[idx];
|
|
return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
|
|
}
|
|
|
|
// Callbacks for InspectAll that will count the number of bytes
|
|
// allocated and objects allocated, respectively.
|
|
static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
|
|
static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
|
|
|
|
bool DoesReleaseAllPages() const {
|
|
return page_release_mode_ == kPageReleaseModeAll;
|
|
}
|
|
|
|
// Verify for debugging.
|
|
void Verify() REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_, !bulk_free_lock_,
|
|
!lock_);
|
|
|
|
bool LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes)
|
|
REQUIRES(!bulk_free_lock_, !lock_);
|
|
|
|
void DumpStats(std::ostream& os)
|
|
REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_) REQUIRES(!bulk_free_lock_);
|
|
|
|
private:
|
|
friend std::ostream& operator<<(std::ostream& os, RosAlloc::PageMapKind rhs);
|
|
|
|
DISALLOW_COPY_AND_ASSIGN(RosAlloc);
|
|
};
|
|
std::ostream& operator<<(std::ostream& os, RosAlloc::PageMapKind rhs);
|
|
|
|
// Callback from rosalloc when it needs to increase the footprint. Must be implemented somewhere
|
|
// else (currently rosalloc_space.cc).
|
|
void* ArtRosAllocMoreCore(allocator::RosAlloc* rosalloc, intptr_t increment);
|
|
|
|
} // namespace allocator
|
|
} // namespace gc
|
|
} // namespace art
|
|
|
|
#endif // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
|