You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
428 lines
14 KiB
428 lines
14 KiB
//===-- tsan_clock.cc -----------------------------------------------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file is a part of ThreadSanitizer (TSan), a race detector.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "tsan_clock.h"
|
|
#include "tsan_rtl.h"
|
|
#include "sanitizer_common/sanitizer_placement_new.h"
|
|
|
|
// SyncClock and ThreadClock implement vector clocks for sync variables
|
|
// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
|
|
// ThreadClock contains fixed-size vector clock for maximum number of threads.
|
|
// SyncClock contains growable vector clock for currently necessary number of
|
|
// threads.
|
|
// Together they implement very simple model of operations, namely:
|
|
//
|
|
// void ThreadClock::acquire(const SyncClock *src) {
|
|
// for (int i = 0; i < kMaxThreads; i++)
|
|
// clock[i] = max(clock[i], src->clock[i]);
|
|
// }
|
|
//
|
|
// void ThreadClock::release(SyncClock *dst) const {
|
|
// for (int i = 0; i < kMaxThreads; i++)
|
|
// dst->clock[i] = max(dst->clock[i], clock[i]);
|
|
// }
|
|
//
|
|
// void ThreadClock::ReleaseStore(SyncClock *dst) const {
|
|
// for (int i = 0; i < kMaxThreads; i++)
|
|
// dst->clock[i] = clock[i];
|
|
// }
|
|
//
|
|
// void ThreadClock::acq_rel(SyncClock *dst) {
|
|
// acquire(dst);
|
|
// release(dst);
|
|
// }
|
|
//
|
|
// Conformance to this model is extensively verified in tsan_clock_test.cc.
|
|
// However, the implementation is significantly more complex. The complexity
|
|
// allows to implement important classes of use cases in O(1) instead of O(N).
|
|
//
|
|
// The use cases are:
|
|
// 1. Singleton/once atomic that has a single release-store operation followed
|
|
// by zillions of acquire-loads (the acquire-load is O(1)).
|
|
// 2. Thread-local mutex (both lock and unlock can be O(1)).
|
|
// 3. Leaf mutex (unlock is O(1)).
|
|
// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
|
|
// 5. An atomic with a single writer (writes can be O(1)).
|
|
// The implementation dynamically adopts to workload. So if an atomic is in
|
|
// read-only phase, these reads will be O(1); if it later switches to read/write
|
|
// phase, the implementation will correctly handle that by switching to O(N).
|
|
//
|
|
// Thread-safety note: all const operations on SyncClock's are conducted under
|
|
// a shared lock; all non-const operations on SyncClock's are conducted under
|
|
// an exclusive lock; ThreadClock's are private to respective threads and so
|
|
// do not need any protection.
|
|
//
|
|
// Description of ThreadClock state:
|
|
// clk_ - fixed size vector clock.
|
|
// nclk_ - effective size of the vector clock (the rest is zeros).
|
|
// tid_ - index of the thread associated with he clock ("current thread").
|
|
// last_acquire_ - current thread time when it acquired something from
|
|
// other threads.
|
|
//
|
|
// Description of SyncClock state:
|
|
// clk_ - variable size vector clock, low kClkBits hold timestamp,
|
|
// the remaining bits hold "acquired" flag (the actual value is thread's
|
|
// reused counter);
|
|
// if acquried == thr->reused_, then the respective thread has already
|
|
// acquired this clock (except possibly dirty_tids_).
|
|
// dirty_tids_ - holds up to two indeces in the vector clock that other threads
|
|
// need to acquire regardless of "acquired" flag value;
|
|
// release_store_tid_ - denotes that the clock state is a result of
|
|
// release-store operation by the thread with release_store_tid_ index.
|
|
// release_store_reused_ - reuse count of release_store_tid_.
|
|
|
|
// We don't have ThreadState in these methods, so this is an ugly hack that
|
|
// works only in C++.
|
|
#ifndef SANITIZER_GO
|
|
# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
|
|
#else
|
|
# define CPP_STAT_INC(typ) (void)0
|
|
#endif
|
|
|
|
namespace __tsan {
|
|
|
|
ThreadClock::ThreadClock(unsigned tid, unsigned reused)
|
|
: tid_(tid)
|
|
, reused_(reused + 1) { // 0 has special meaning
|
|
CHECK_LT(tid, kMaxTidInClock);
|
|
CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
|
|
nclk_ = tid_ + 1;
|
|
last_acquire_ = 0;
|
|
internal_memset(clk_, 0, sizeof(clk_));
|
|
clk_[tid_].reused = reused_;
|
|
}
|
|
|
|
void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
|
|
DCHECK_LE(nclk_, kMaxTid);
|
|
DCHECK_LE(src->size_, kMaxTid);
|
|
CPP_STAT_INC(StatClockAcquire);
|
|
|
|
// Check if it's empty -> no need to do anything.
|
|
const uptr nclk = src->size_;
|
|
if (nclk == 0) {
|
|
CPP_STAT_INC(StatClockAcquireEmpty);
|
|
return;
|
|
}
|
|
|
|
// Check if we've already acquired src after the last release operation on src
|
|
bool acquired = false;
|
|
if (nclk > tid_) {
|
|
CPP_STAT_INC(StatClockAcquireLarge);
|
|
if (src->elem(tid_).reused == reused_) {
|
|
CPP_STAT_INC(StatClockAcquireRepeat);
|
|
for (unsigned i = 0; i < kDirtyTids; i++) {
|
|
unsigned tid = src->dirty_tids_[i];
|
|
if (tid != kInvalidTid) {
|
|
u64 epoch = src->elem(tid).epoch;
|
|
if (clk_[tid].epoch < epoch) {
|
|
clk_[tid].epoch = epoch;
|
|
acquired = true;
|
|
}
|
|
}
|
|
}
|
|
if (acquired) {
|
|
CPP_STAT_INC(StatClockAcquiredSomething);
|
|
last_acquire_ = clk_[tid_].epoch;
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
// O(N) acquire.
|
|
CPP_STAT_INC(StatClockAcquireFull);
|
|
nclk_ = max(nclk_, nclk);
|
|
for (uptr i = 0; i < nclk; i++) {
|
|
u64 epoch = src->elem(i).epoch;
|
|
if (clk_[i].epoch < epoch) {
|
|
clk_[i].epoch = epoch;
|
|
acquired = true;
|
|
}
|
|
}
|
|
|
|
// Remember that this thread has acquired this clock.
|
|
if (nclk > tid_)
|
|
src->elem(tid_).reused = reused_;
|
|
|
|
if (acquired) {
|
|
CPP_STAT_INC(StatClockAcquiredSomething);
|
|
last_acquire_ = clk_[tid_].epoch;
|
|
}
|
|
}
|
|
|
|
void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
|
|
DCHECK_LE(nclk_, kMaxTid);
|
|
DCHECK_LE(dst->size_, kMaxTid);
|
|
|
|
if (dst->size_ == 0) {
|
|
// ReleaseStore will correctly set release_store_tid_,
|
|
// which can be important for future operations.
|
|
ReleaseStore(c, dst);
|
|
return;
|
|
}
|
|
|
|
CPP_STAT_INC(StatClockRelease);
|
|
// Check if we need to resize dst.
|
|
if (dst->size_ < nclk_)
|
|
dst->Resize(c, nclk_);
|
|
|
|
// Check if we had not acquired anything from other threads
|
|
// since the last release on dst. If so, we need to update
|
|
// only dst->elem(tid_).
|
|
if (dst->elem(tid_).epoch > last_acquire_) {
|
|
UpdateCurrentThread(dst);
|
|
if (dst->release_store_tid_ != tid_ ||
|
|
dst->release_store_reused_ != reused_)
|
|
dst->release_store_tid_ = kInvalidTid;
|
|
return;
|
|
}
|
|
|
|
// O(N) release.
|
|
CPP_STAT_INC(StatClockReleaseFull);
|
|
// First, remember whether we've acquired dst.
|
|
bool acquired = IsAlreadyAcquired(dst);
|
|
if (acquired)
|
|
CPP_STAT_INC(StatClockReleaseAcquired);
|
|
// Update dst->clk_.
|
|
for (uptr i = 0; i < nclk_; i++) {
|
|
ClockElem &ce = dst->elem(i);
|
|
ce.epoch = max(ce.epoch, clk_[i].epoch);
|
|
ce.reused = 0;
|
|
}
|
|
// Clear 'acquired' flag in the remaining elements.
|
|
if (nclk_ < dst->size_)
|
|
CPP_STAT_INC(StatClockReleaseClearTail);
|
|
for (uptr i = nclk_; i < dst->size_; i++)
|
|
dst->elem(i).reused = 0;
|
|
for (unsigned i = 0; i < kDirtyTids; i++)
|
|
dst->dirty_tids_[i] = kInvalidTid;
|
|
dst->release_store_tid_ = kInvalidTid;
|
|
dst->release_store_reused_ = 0;
|
|
// If we've acquired dst, remember this fact,
|
|
// so that we don't need to acquire it on next acquire.
|
|
if (acquired)
|
|
dst->elem(tid_).reused = reused_;
|
|
}
|
|
|
|
void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
|
|
DCHECK_LE(nclk_, kMaxTid);
|
|
DCHECK_LE(dst->size_, kMaxTid);
|
|
CPP_STAT_INC(StatClockStore);
|
|
|
|
// Check if we need to resize dst.
|
|
if (dst->size_ < nclk_)
|
|
dst->Resize(c, nclk_);
|
|
|
|
if (dst->release_store_tid_ == tid_ &&
|
|
dst->release_store_reused_ == reused_ &&
|
|
dst->elem(tid_).epoch > last_acquire_) {
|
|
CPP_STAT_INC(StatClockStoreFast);
|
|
UpdateCurrentThread(dst);
|
|
return;
|
|
}
|
|
|
|
// O(N) release-store.
|
|
CPP_STAT_INC(StatClockStoreFull);
|
|
for (uptr i = 0; i < nclk_; i++) {
|
|
ClockElem &ce = dst->elem(i);
|
|
ce.epoch = clk_[i].epoch;
|
|
ce.reused = 0;
|
|
}
|
|
// Clear the tail of dst->clk_.
|
|
if (nclk_ < dst->size_) {
|
|
for (uptr i = nclk_; i < dst->size_; i++) {
|
|
ClockElem &ce = dst->elem(i);
|
|
ce.epoch = 0;
|
|
ce.reused = 0;
|
|
}
|
|
CPP_STAT_INC(StatClockStoreTail);
|
|
}
|
|
for (unsigned i = 0; i < kDirtyTids; i++)
|
|
dst->dirty_tids_[i] = kInvalidTid;
|
|
dst->release_store_tid_ = tid_;
|
|
dst->release_store_reused_ = reused_;
|
|
// Rememeber that we don't need to acquire it in future.
|
|
dst->elem(tid_).reused = reused_;
|
|
}
|
|
|
|
void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
|
|
CPP_STAT_INC(StatClockAcquireRelease);
|
|
acquire(c, dst);
|
|
ReleaseStore(c, dst);
|
|
}
|
|
|
|
// Updates only single element related to the current thread in dst->clk_.
|
|
void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
|
|
// Update the threads time, but preserve 'acquired' flag.
|
|
dst->elem(tid_).epoch = clk_[tid_].epoch;
|
|
|
|
for (unsigned i = 0; i < kDirtyTids; i++) {
|
|
if (dst->dirty_tids_[i] == tid_) {
|
|
CPP_STAT_INC(StatClockReleaseFast1);
|
|
return;
|
|
}
|
|
if (dst->dirty_tids_[i] == kInvalidTid) {
|
|
CPP_STAT_INC(StatClockReleaseFast2);
|
|
dst->dirty_tids_[i] = tid_;
|
|
return;
|
|
}
|
|
}
|
|
// Reset all 'acquired' flags, O(N).
|
|
CPP_STAT_INC(StatClockReleaseSlow);
|
|
for (uptr i = 0; i < dst->size_; i++)
|
|
dst->elem(i).reused = 0;
|
|
for (unsigned i = 0; i < kDirtyTids; i++)
|
|
dst->dirty_tids_[i] = kInvalidTid;
|
|
}
|
|
|
|
// Checks whether the current threads has already acquired src.
|
|
bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
|
|
if (src->elem(tid_).reused != reused_)
|
|
return false;
|
|
for (unsigned i = 0; i < kDirtyTids; i++) {
|
|
unsigned tid = src->dirty_tids_[i];
|
|
if (tid != kInvalidTid) {
|
|
if (clk_[tid].epoch < src->elem(tid).epoch)
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void SyncClock::Resize(ClockCache *c, uptr nclk) {
|
|
CPP_STAT_INC(StatClockReleaseResize);
|
|
if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
|
|
RoundUpTo(size_, ClockBlock::kClockCount)) {
|
|
// Growing within the same block.
|
|
// Memory is already allocated, just increase the size.
|
|
size_ = nclk;
|
|
return;
|
|
}
|
|
if (nclk <= ClockBlock::kClockCount) {
|
|
// Grow from 0 to one-level table.
|
|
CHECK_EQ(size_, 0);
|
|
CHECK_EQ(tab_, 0);
|
|
CHECK_EQ(tab_idx_, 0);
|
|
size_ = nclk;
|
|
tab_idx_ = ctx->clock_alloc.Alloc(c);
|
|
tab_ = ctx->clock_alloc.Map(tab_idx_);
|
|
internal_memset(tab_, 0, sizeof(*tab_));
|
|
return;
|
|
}
|
|
// Growing two-level table.
|
|
if (size_ == 0) {
|
|
// Allocate first level table.
|
|
tab_idx_ = ctx->clock_alloc.Alloc(c);
|
|
tab_ = ctx->clock_alloc.Map(tab_idx_);
|
|
internal_memset(tab_, 0, sizeof(*tab_));
|
|
} else if (size_ <= ClockBlock::kClockCount) {
|
|
// Transform one-level table to two-level table.
|
|
u32 old = tab_idx_;
|
|
tab_idx_ = ctx->clock_alloc.Alloc(c);
|
|
tab_ = ctx->clock_alloc.Map(tab_idx_);
|
|
internal_memset(tab_, 0, sizeof(*tab_));
|
|
tab_->table[0] = old;
|
|
}
|
|
// At this point we have first level table allocated.
|
|
// Add second level tables as necessary.
|
|
for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
|
|
i < nclk; i += ClockBlock::kClockCount) {
|
|
u32 idx = ctx->clock_alloc.Alloc(c);
|
|
ClockBlock *cb = ctx->clock_alloc.Map(idx);
|
|
internal_memset(cb, 0, sizeof(*cb));
|
|
CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
|
|
tab_->table[i/ClockBlock::kClockCount] = idx;
|
|
}
|
|
size_ = nclk;
|
|
}
|
|
|
|
// Sets a single element in the vector clock.
|
|
// This function is called only from weird places like AcquireGlobal.
|
|
void ThreadClock::set(unsigned tid, u64 v) {
|
|
DCHECK_LT(tid, kMaxTid);
|
|
DCHECK_GE(v, clk_[tid].epoch);
|
|
clk_[tid].epoch = v;
|
|
if (nclk_ <= tid)
|
|
nclk_ = tid + 1;
|
|
last_acquire_ = clk_[tid_].epoch;
|
|
}
|
|
|
|
void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
|
|
printf("clock=[");
|
|
for (uptr i = 0; i < nclk_; i++)
|
|
printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
|
|
printf("] reused=[");
|
|
for (uptr i = 0; i < nclk_; i++)
|
|
printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
|
|
printf("] tid=%u/%u last_acq=%llu",
|
|
tid_, reused_, last_acquire_);
|
|
}
|
|
|
|
SyncClock::SyncClock()
|
|
: release_store_tid_(kInvalidTid)
|
|
, release_store_reused_()
|
|
, tab_()
|
|
, tab_idx_()
|
|
, size_() {
|
|
for (uptr i = 0; i < kDirtyTids; i++)
|
|
dirty_tids_[i] = kInvalidTid;
|
|
}
|
|
|
|
SyncClock::~SyncClock() {
|
|
// Reset must be called before dtor.
|
|
CHECK_EQ(size_, 0);
|
|
CHECK_EQ(tab_, 0);
|
|
CHECK_EQ(tab_idx_, 0);
|
|
}
|
|
|
|
void SyncClock::Reset(ClockCache *c) {
|
|
if (size_ == 0) {
|
|
// nothing
|
|
} else if (size_ <= ClockBlock::kClockCount) {
|
|
// One-level table.
|
|
ctx->clock_alloc.Free(c, tab_idx_);
|
|
} else {
|
|
// Two-level table.
|
|
for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
|
|
ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
|
|
ctx->clock_alloc.Free(c, tab_idx_);
|
|
}
|
|
tab_ = 0;
|
|
tab_idx_ = 0;
|
|
size_ = 0;
|
|
release_store_tid_ = kInvalidTid;
|
|
release_store_reused_ = 0;
|
|
for (uptr i = 0; i < kDirtyTids; i++)
|
|
dirty_tids_[i] = kInvalidTid;
|
|
}
|
|
|
|
ClockElem &SyncClock::elem(unsigned tid) const {
|
|
DCHECK_LT(tid, size_);
|
|
if (size_ <= ClockBlock::kClockCount)
|
|
return tab_->clock[tid];
|
|
u32 idx = tab_->table[tid / ClockBlock::kClockCount];
|
|
ClockBlock *cb = ctx->clock_alloc.Map(idx);
|
|
return cb->clock[tid % ClockBlock::kClockCount];
|
|
}
|
|
|
|
void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
|
|
printf("clock=[");
|
|
for (uptr i = 0; i < size_; i++)
|
|
printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
|
|
printf("] reused=[");
|
|
for (uptr i = 0; i < size_; i++)
|
|
printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
|
|
printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
|
|
release_store_tid_, release_store_reused_,
|
|
dirty_tids_[0], dirty_tids_[1]);
|
|
}
|
|
} // namespace __tsan
|