You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1041 lines
34 KiB
1041 lines
34 KiB
/*
|
|
* ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer
|
|
*/
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for details.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef __STDC_FORMAT_MACROS
|
|
#define __STDC_FORMAT_MACROS
|
|
#endif
|
|
|
|
#include <algorithm>
|
|
#include <atomic>
|
|
#include <cassert>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <inttypes.h>
|
|
#include <iostream>
|
|
#include <list>
|
|
#include <mutex>
|
|
#include <sstream>
|
|
#include <stack>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
#if (defined __APPLE__ && defined __MACH__)
|
|
#include <dlfcn.h>
|
|
#endif
|
|
|
|
#include "omp-tools.h"
|
|
#include <sys/resource.h>
|
|
|
|
// Define attribute that indicates that the fall through from the previous
|
|
// case label is intentional and should not be diagnosed by a compiler
|
|
// Code from libcxx/include/__config
|
|
// Use a function like macro to imply that it must be followed by a semicolon
|
|
#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
|
|
#define KMP_FALLTHROUGH() [[fallthrough]]
|
|
#elif __has_cpp_attribute(clang::fallthrough)
|
|
#define KMP_FALLTHROUGH() [[clang::fallthrough]]
|
|
#elif __has_attribute(fallthrough) || __GNUC__ >= 7
|
|
#define KMP_FALLTHROUGH() __attribute__((__fallthrough__))
|
|
#else
|
|
#define KMP_FALLTHROUGH() ((void)0)
|
|
#endif
|
|
|
|
static int runOnTsan;
|
|
static int hasReductionCallback;
|
|
|
|
class ArcherFlags {
|
|
public:
|
|
#if (LLVM_VERSION) >= 40
|
|
int flush_shadow{0};
|
|
#endif
|
|
int print_max_rss{0};
|
|
int verbose{0};
|
|
int enabled{1};
|
|
int ignore_serial{0};
|
|
|
|
ArcherFlags(const char *env) {
|
|
if (env) {
|
|
std::vector<std::string> tokens;
|
|
std::string token;
|
|
std::string str(env);
|
|
std::istringstream iss(str);
|
|
while (std::getline(iss, token, ' '))
|
|
tokens.push_back(token);
|
|
|
|
for (std::vector<std::string>::iterator it = tokens.begin();
|
|
it != tokens.end(); ++it) {
|
|
#if (LLVM_VERSION) >= 40
|
|
if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow))
|
|
continue;
|
|
#endif
|
|
if (sscanf(it->c_str(), "print_max_rss=%d", &print_max_rss))
|
|
continue;
|
|
if (sscanf(it->c_str(), "verbose=%d", &verbose))
|
|
continue;
|
|
if (sscanf(it->c_str(), "enable=%d", &enabled))
|
|
continue;
|
|
if (sscanf(it->c_str(), "ignore_serial=%d", &ignore_serial))
|
|
continue;
|
|
std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token
|
|
<< std::endl;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
class TsanFlags {
|
|
public:
|
|
int ignore_noninstrumented_modules;
|
|
|
|
TsanFlags(const char *env) : ignore_noninstrumented_modules(0) {
|
|
if (env) {
|
|
std::vector<std::string> tokens;
|
|
std::string str(env);
|
|
auto end = str.end();
|
|
auto it = str.begin();
|
|
auto is_sep = [](char c) {
|
|
return c == ' ' || c == ',' || c == ':' || c == '\n' || c == '\t' ||
|
|
c == '\r';
|
|
};
|
|
while (it != end) {
|
|
auto next_it = std::find_if(it, end, is_sep);
|
|
tokens.emplace_back(it, next_it);
|
|
it = next_it;
|
|
if (it != end) {
|
|
++it;
|
|
}
|
|
}
|
|
|
|
for (const auto &token : tokens) {
|
|
// we are interested in ignore_noninstrumented_modules to print a
|
|
// warning
|
|
if (sscanf(token.c_str(), "ignore_noninstrumented_modules=%d",
|
|
&ignore_noninstrumented_modules))
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
#if (LLVM_VERSION) >= 40
|
|
extern "C" {
|
|
int __attribute__((weak)) __archer_get_omp_status();
|
|
void __attribute__((weak)) __tsan_flush_memory() {}
|
|
}
|
|
#endif
|
|
ArcherFlags *archer_flags;
|
|
|
|
// The following definitions are pasted from "llvm/Support/Compiler.h" to allow
|
|
// the code
|
|
// to be compiled with other compilers like gcc:
|
|
|
|
#ifndef TsanHappensBefore
|
|
// Thread Sanitizer is a tool that finds races in code.
|
|
// See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
|
|
// tsan detects these exact functions by name.
|
|
extern "C" {
|
|
#if (defined __APPLE__ && defined __MACH__)
|
|
static void AnnotateHappensAfter(const char *file, int line,
|
|
const volatile void *cv) {
|
|
void (*fptr)(const char *, int, const volatile void *);
|
|
|
|
fptr = (void (*)(const char *, int, const volatile void *))dlsym(
|
|
RTLD_DEFAULT, "AnnotateHappensAfter");
|
|
(*fptr)(file, line, cv);
|
|
}
|
|
static void AnnotateHappensBefore(const char *file, int line,
|
|
const volatile void *cv) {
|
|
void (*fptr)(const char *, int, const volatile void *);
|
|
|
|
fptr = (void (*)(const char *, int, const volatile void *))dlsym(
|
|
RTLD_DEFAULT, "AnnotateHappensBefore");
|
|
(*fptr)(file, line, cv);
|
|
}
|
|
static void AnnotateIgnoreWritesBegin(const char *file, int line) {
|
|
void (*fptr)(const char *, int);
|
|
|
|
fptr = (void (*)(const char *, int))dlsym(RTLD_DEFAULT,
|
|
"AnnotateIgnoreWritesBegin");
|
|
(*fptr)(file, line);
|
|
}
|
|
static void AnnotateIgnoreWritesEnd(const char *file, int line) {
|
|
void (*fptr)(const char *, int);
|
|
|
|
fptr = (void (*)(const char *, int))dlsym(RTLD_DEFAULT,
|
|
"AnnotateIgnoreWritesEnd");
|
|
(*fptr)(file, line);
|
|
}
|
|
static void AnnotateNewMemory(const char *file, int line,
|
|
const volatile void *cv, size_t size) {
|
|
void (*fptr)(const char *, int, const volatile void *, size_t);
|
|
|
|
fptr = (void (*)(const char *, int, const volatile void *, size_t))dlsym(
|
|
RTLD_DEFAULT, "AnnotateNewMemory");
|
|
(*fptr)(file, line, cv, size);
|
|
}
|
|
static int RunningOnValgrind() {
|
|
int (*fptr)();
|
|
|
|
fptr = (int (*)())dlsym(RTLD_DEFAULT, "RunningOnValgrind");
|
|
if (fptr && fptr != RunningOnValgrind)
|
|
runOnTsan = 0;
|
|
return 0;
|
|
}
|
|
#else
|
|
void __attribute__((weak))
|
|
AnnotateHappensAfter(const char *file, int line, const volatile void *cv) {}
|
|
void __attribute__((weak))
|
|
AnnotateHappensBefore(const char *file, int line, const volatile void *cv) {}
|
|
void __attribute__((weak))
|
|
AnnotateIgnoreWritesBegin(const char *file, int line) {}
|
|
void __attribute__((weak)) AnnotateIgnoreWritesEnd(const char *file, int line) {
|
|
}
|
|
void __attribute__((weak))
|
|
AnnotateNewMemory(const char *file, int line, const volatile void *cv,
|
|
size_t size) {}
|
|
int __attribute__((weak)) RunningOnValgrind() {
|
|
runOnTsan = 0;
|
|
return 0;
|
|
}
|
|
void __attribute__((weak)) __tsan_func_entry(const void *call_pc) {}
|
|
void __attribute__((weak)) __tsan_func_exit(void) {}
|
|
#endif
|
|
}
|
|
|
|
// This marker is used to define a happens-before arc. The race detector will
|
|
// infer an arc from the begin to the end when they share the same pointer
|
|
// argument.
|
|
#define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
|
|
|
|
// This marker defines the destination of a happens-before arc.
|
|
#define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
|
|
|
|
// Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
|
|
#define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
|
|
|
|
// Resume checking for racy writes.
|
|
#define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
|
|
|
|
// We don't really delete the clock for now
|
|
#define TsanDeleteClock(cv)
|
|
|
|
// newMemory
|
|
#define TsanNewMemory(addr, size) \
|
|
AnnotateNewMemory(__FILE__, __LINE__, addr, size)
|
|
#define TsanFreeMemory(addr, size) \
|
|
AnnotateNewMemory(__FILE__, __LINE__, addr, size)
|
|
#endif
|
|
|
|
// Function entry/exit
|
|
#define TsanFuncEntry(pc) __tsan_func_entry(pc)
|
|
#define TsanFuncExit() __tsan_func_exit()
|
|
|
|
/// Required OMPT inquiry functions.
|
|
static ompt_get_parallel_info_t ompt_get_parallel_info;
|
|
static ompt_get_thread_data_t ompt_get_thread_data;
|
|
|
|
typedef uint64_t ompt_tsan_clockid;
|
|
|
|
static uint64_t my_next_id() {
|
|
static uint64_t ID = 0;
|
|
uint64_t ret = __sync_fetch_and_add(&ID, 1);
|
|
return ret;
|
|
}
|
|
|
|
// Data structure to provide a threadsafe pool of reusable objects.
|
|
// DataPool<Type of objects, Size of blockalloc>
|
|
template <typename T, int N> struct DataPool {
|
|
std::mutex DPMutex;
|
|
std::stack<T *> DataPointer;
|
|
std::list<void *> memory;
|
|
int total;
|
|
|
|
void newDatas() {
|
|
// prefix the Data with a pointer to 'this', allows to return memory to
|
|
// 'this',
|
|
// without explicitly knowing the source.
|
|
//
|
|
// To reduce lock contention, we use thread local DataPools, but Data
|
|
// objects move to other threads.
|
|
// The strategy is to get objects from local pool. Only if the object moved
|
|
// to another
|
|
// thread, we might see a penalty on release (returnData).
|
|
// For "single producer" pattern, a single thread creates tasks, these are
|
|
// executed by other threads.
|
|
// The master will have a high demand on TaskData, so return after use.
|
|
struct pooldata {
|
|
DataPool<T, N> *dp;
|
|
T data;
|
|
};
|
|
// We alloc without initialize the memory. We cannot call constructors.
|
|
// Therefore use malloc!
|
|
pooldata *datas = (pooldata *)malloc(sizeof(pooldata) * N);
|
|
memory.push_back(datas);
|
|
for (int i = 0; i < N; i++) {
|
|
datas[i].dp = this;
|
|
DataPointer.push(&(datas[i].data));
|
|
}
|
|
total += N;
|
|
}
|
|
|
|
T *getData() {
|
|
T *ret;
|
|
DPMutex.lock();
|
|
if (DataPointer.empty())
|
|
newDatas();
|
|
ret = DataPointer.top();
|
|
DataPointer.pop();
|
|
DPMutex.unlock();
|
|
return ret;
|
|
}
|
|
|
|
void returnData(T *data) {
|
|
DPMutex.lock();
|
|
DataPointer.push(data);
|
|
DPMutex.unlock();
|
|
}
|
|
|
|
void getDatas(int n, T **datas) {
|
|
DPMutex.lock();
|
|
for (int i = 0; i < n; i++) {
|
|
if (DataPointer.empty())
|
|
newDatas();
|
|
datas[i] = DataPointer.top();
|
|
DataPointer.pop();
|
|
}
|
|
DPMutex.unlock();
|
|
}
|
|
|
|
void returnDatas(int n, T **datas) {
|
|
DPMutex.lock();
|
|
for (int i = 0; i < n; i++) {
|
|
DataPointer.push(datas[i]);
|
|
}
|
|
DPMutex.unlock();
|
|
}
|
|
|
|
DataPool() : DPMutex(), DataPointer(), total(0) {}
|
|
|
|
~DataPool() {
|
|
// we assume all memory is returned when the thread finished / destructor is
|
|
// called
|
|
for (auto i : memory)
|
|
if (i)
|
|
free(i);
|
|
}
|
|
};
|
|
|
|
// This function takes care to return the data to the originating DataPool
|
|
// A pointer to the originating DataPool is stored just before the actual data.
|
|
template <typename T, int N> static void retData(void *data) {
|
|
((DataPool<T, N> **)data)[-1]->returnData((T *)data);
|
|
}
|
|
|
|
struct ParallelData;
|
|
__thread DataPool<ParallelData, 4> *pdp;
|
|
|
|
/// Data structure to store additional information for parallel regions.
|
|
struct ParallelData {
|
|
|
|
// Parallel fork is just another barrier, use Barrier[1]
|
|
|
|
/// Two addresses for relationships with barriers.
|
|
ompt_tsan_clockid Barrier[2];
|
|
|
|
const void *codePtr;
|
|
|
|
void *GetParallelPtr() { return &(Barrier[1]); }
|
|
|
|
void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); }
|
|
|
|
ParallelData(const void *codeptr) : codePtr(codeptr) {}
|
|
~ParallelData() {
|
|
TsanDeleteClock(&(Barrier[0]));
|
|
TsanDeleteClock(&(Barrier[1]));
|
|
}
|
|
// overload new/delete to use DataPool for memory management.
|
|
void *operator new(size_t size) { return pdp->getData(); }
|
|
void operator delete(void *p, size_t) { retData<ParallelData, 4>(p); }
|
|
};
|
|
|
|
static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) {
|
|
return reinterpret_cast<ParallelData *>(parallel_data->ptr);
|
|
}
|
|
|
|
struct Taskgroup;
|
|
__thread DataPool<Taskgroup, 4> *tgp;
|
|
|
|
/// Data structure to support stacking of taskgroups and allow synchronization.
|
|
struct Taskgroup {
|
|
/// Its address is used for relationships of the taskgroup's task set.
|
|
ompt_tsan_clockid Ptr;
|
|
|
|
/// Reference to the parent taskgroup.
|
|
Taskgroup *Parent;
|
|
|
|
Taskgroup(Taskgroup *Parent) : Parent(Parent) {}
|
|
~Taskgroup() { TsanDeleteClock(&Ptr); }
|
|
|
|
void *GetPtr() { return &Ptr; }
|
|
// overload new/delete to use DataPool for memory management.
|
|
void *operator new(size_t size) { return tgp->getData(); }
|
|
void operator delete(void *p, size_t) { retData<Taskgroup, 4>(p); }
|
|
};
|
|
|
|
struct TaskData;
|
|
__thread DataPool<TaskData, 4> *tdp;
|
|
|
|
/// Data structure to store additional information for tasks.
|
|
struct TaskData {
|
|
/// Its address is used for relationships of this task.
|
|
ompt_tsan_clockid Task;
|
|
|
|
/// Child tasks use its address to declare a relationship to a taskwait in
|
|
/// this task.
|
|
ompt_tsan_clockid Taskwait;
|
|
|
|
/// Whether this task is currently executing a barrier.
|
|
bool InBarrier;
|
|
|
|
/// Whether this task is an included task.
|
|
int TaskType{0};
|
|
|
|
/// Index of which barrier to use next.
|
|
char BarrierIndex;
|
|
|
|
/// Count how often this structure has been put into child tasks + 1.
|
|
std::atomic_int RefCount;
|
|
|
|
/// Reference to the parent that created this task.
|
|
TaskData *Parent;
|
|
|
|
/// Reference to the implicit task in the stack above this task.
|
|
TaskData *ImplicitTask;
|
|
|
|
/// Reference to the team of this task.
|
|
ParallelData *Team;
|
|
|
|
/// Reference to the current taskgroup that this task either belongs to or
|
|
/// that it just created.
|
|
Taskgroup *TaskGroup;
|
|
|
|
/// Dependency information for this task.
|
|
ompt_dependence_t *Dependencies;
|
|
|
|
/// Number of dependency entries.
|
|
unsigned DependencyCount;
|
|
|
|
void *PrivateData;
|
|
size_t PrivateDataSize;
|
|
|
|
int execution;
|
|
int freed;
|
|
|
|
TaskData(TaskData *Parent, int taskType)
|
|
: InBarrier(false), TaskType(taskType), BarrierIndex(0), RefCount(1),
|
|
Parent(Parent), ImplicitTask(nullptr), Team(Parent->Team),
|
|
TaskGroup(nullptr), DependencyCount(0), execution(0), freed(0) {
|
|
if (Parent != nullptr) {
|
|
Parent->RefCount++;
|
|
// Copy over pointer to taskgroup. This task may set up its own stack
|
|
// but for now belongs to its parent's taskgroup.
|
|
TaskGroup = Parent->TaskGroup;
|
|
}
|
|
}
|
|
|
|
TaskData(ParallelData *Team, int taskType)
|
|
: InBarrier(false), TaskType(taskType), BarrierIndex(0), RefCount(1),
|
|
Parent(nullptr), ImplicitTask(this), Team(Team), TaskGroup(nullptr),
|
|
DependencyCount(0), execution(1), freed(0) {}
|
|
|
|
~TaskData() {
|
|
TsanDeleteClock(&Task);
|
|
TsanDeleteClock(&Taskwait);
|
|
}
|
|
|
|
bool isIncluded() { return TaskType & ompt_task_undeferred; }
|
|
bool isUntied() { return TaskType & ompt_task_untied; }
|
|
bool isFinal() { return TaskType & ompt_task_final; }
|
|
bool isMergable() { return TaskType & ompt_task_mergeable; }
|
|
bool isMerged() { return TaskType & ompt_task_merged; }
|
|
|
|
bool isExplicit() { return TaskType & ompt_task_explicit; }
|
|
bool isImplicit() { return TaskType & ompt_task_implicit; }
|
|
bool isInitial() { return TaskType & ompt_task_initial; }
|
|
bool isTarget() { return TaskType & ompt_task_target; }
|
|
|
|
void *GetTaskPtr() { return &Task; }
|
|
|
|
void *GetTaskwaitPtr() { return &Taskwait; }
|
|
// overload new/delete to use DataPool for memory management.
|
|
void *operator new(size_t size) { return tdp->getData(); }
|
|
void operator delete(void *p, size_t) { retData<TaskData, 4>(p); }
|
|
};
|
|
|
|
static inline TaskData *ToTaskData(ompt_data_t *task_data) {
|
|
return reinterpret_cast<TaskData *>(task_data->ptr);
|
|
}
|
|
|
|
static inline void *ToInAddr(void *OutAddr) {
|
|
// FIXME: This will give false negatives when a second variable lays directly
|
|
// behind a variable that only has a width of 1 byte.
|
|
// Another approach would be to "negate" the address or to flip the
|
|
// first bit...
|
|
return reinterpret_cast<char *>(OutAddr) + 1;
|
|
}
|
|
|
|
/// Store a mutex for each wait_id to resolve race condition with callbacks.
|
|
std::unordered_map<ompt_wait_id_t, std::mutex> Locks;
|
|
std::mutex LocksMutex;
|
|
|
|
static void ompt_tsan_thread_begin(ompt_thread_t thread_type,
|
|
ompt_data_t *thread_data) {
|
|
pdp = new DataPool<ParallelData, 4>;
|
|
TsanNewMemory(pdp, sizeof(pdp));
|
|
tgp = new DataPool<Taskgroup, 4>;
|
|
TsanNewMemory(tgp, sizeof(tgp));
|
|
tdp = new DataPool<TaskData, 4>;
|
|
TsanNewMemory(tdp, sizeof(tdp));
|
|
thread_data->value = my_next_id();
|
|
}
|
|
|
|
static void ompt_tsan_thread_end(ompt_data_t *thread_data) {
|
|
delete pdp;
|
|
delete tgp;
|
|
delete tdp;
|
|
}
|
|
|
|
/// OMPT event callbacks for handling parallel regions.
|
|
|
|
static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data,
|
|
const ompt_frame_t *parent_task_frame,
|
|
ompt_data_t *parallel_data,
|
|
uint32_t requested_team_size, int flag,
|
|
const void *codeptr_ra) {
|
|
ParallelData *Data = new ParallelData(codeptr_ra);
|
|
parallel_data->ptr = Data;
|
|
|
|
TsanHappensBefore(Data->GetParallelPtr());
|
|
if (archer_flags->ignore_serial && ToTaskData(parent_task_data)->isInitial())
|
|
TsanIgnoreWritesEnd();
|
|
}
|
|
|
|
static void ompt_tsan_parallel_end(ompt_data_t *parallel_data,
|
|
ompt_data_t *task_data, int flag,
|
|
const void *codeptr_ra) {
|
|
if (archer_flags->ignore_serial && ToTaskData(task_data)->isInitial())
|
|
TsanIgnoreWritesBegin();
|
|
ParallelData *Data = ToParallelData(parallel_data);
|
|
TsanHappensAfter(Data->GetBarrierPtr(0));
|
|
TsanHappensAfter(Data->GetBarrierPtr(1));
|
|
|
|
delete Data;
|
|
|
|
#if (LLVM_VERSION >= 40)
|
|
if (&__archer_get_omp_status) {
|
|
if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow)
|
|
__tsan_flush_memory();
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint,
|
|
ompt_data_t *parallel_data,
|
|
ompt_data_t *task_data,
|
|
unsigned int team_size,
|
|
unsigned int thread_num, int type) {
|
|
switch (endpoint) {
|
|
case ompt_scope_begin:
|
|
if (type & ompt_task_initial) {
|
|
parallel_data->ptr = new ParallelData(nullptr);
|
|
}
|
|
task_data->ptr = new TaskData(ToParallelData(parallel_data), type);
|
|
TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr());
|
|
TsanFuncEntry(ToParallelData(parallel_data)->codePtr);
|
|
break;
|
|
case ompt_scope_end: {
|
|
TaskData *Data = ToTaskData(task_data);
|
|
assert(Data->freed == 0 && "Implicit task end should only be called once!");
|
|
Data->freed = 1;
|
|
assert(Data->RefCount == 1 &&
|
|
"All tasks should have finished at the implicit barrier!");
|
|
delete Data;
|
|
TsanFuncExit();
|
|
break;
|
|
}
|
|
case ompt_scope_beginend:
|
|
// Should not occur according to OpenMP 5.1
|
|
// Tested in OMPT tests
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void ompt_tsan_sync_region(ompt_sync_region_t kind,
|
|
ompt_scope_endpoint_t endpoint,
|
|
ompt_data_t *parallel_data,
|
|
ompt_data_t *task_data,
|
|
const void *codeptr_ra) {
|
|
TaskData *Data = ToTaskData(task_data);
|
|
switch (endpoint) {
|
|
case ompt_scope_begin:
|
|
case ompt_scope_beginend:
|
|
TsanFuncEntry(codeptr_ra);
|
|
switch (kind) {
|
|
case ompt_sync_region_barrier_implementation:
|
|
case ompt_sync_region_barrier_implicit:
|
|
case ompt_sync_region_barrier_explicit:
|
|
case ompt_sync_region_barrier_implicit_parallel:
|
|
case ompt_sync_region_barrier_implicit_workshare:
|
|
case ompt_sync_region_barrier_teams:
|
|
case ompt_sync_region_barrier: {
|
|
char BarrierIndex = Data->BarrierIndex;
|
|
TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex));
|
|
|
|
if (hasReductionCallback < ompt_set_always) {
|
|
// We ignore writes inside the barrier. These would either occur during
|
|
// 1. reductions performed by the runtime which are guaranteed to be
|
|
// race-free.
|
|
// 2. execution of another task.
|
|
// For the latter case we will re-enable tracking in task_switch.
|
|
Data->InBarrier = true;
|
|
TsanIgnoreWritesBegin();
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case ompt_sync_region_taskwait:
|
|
break;
|
|
|
|
case ompt_sync_region_taskgroup:
|
|
Data->TaskGroup = new Taskgroup(Data->TaskGroup);
|
|
break;
|
|
|
|
case ompt_sync_region_reduction:
|
|
// should never be reached
|
|
break;
|
|
|
|
}
|
|
if (endpoint == ompt_scope_begin)
|
|
break;
|
|
KMP_FALLTHROUGH();
|
|
case ompt_scope_end:
|
|
TsanFuncExit();
|
|
switch (kind) {
|
|
case ompt_sync_region_barrier_implementation:
|
|
case ompt_sync_region_barrier_implicit:
|
|
case ompt_sync_region_barrier_explicit:
|
|
case ompt_sync_region_barrier_implicit_parallel:
|
|
case ompt_sync_region_barrier_implicit_workshare:
|
|
case ompt_sync_region_barrier_teams:
|
|
case ompt_sync_region_barrier: {
|
|
if (hasReductionCallback < ompt_set_always) {
|
|
// We want to track writes after the barrier again.
|
|
Data->InBarrier = false;
|
|
TsanIgnoreWritesEnd();
|
|
}
|
|
|
|
char BarrierIndex = Data->BarrierIndex;
|
|
// Barrier will end after it has been entered by all threads.
|
|
if (parallel_data)
|
|
TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex));
|
|
|
|
// It is not guaranteed that all threads have exited this barrier before
|
|
// we enter the next one. So we will use a different address.
|
|
// We are however guaranteed that this current barrier is finished
|
|
// by the time we exit the next one. So we can then reuse the first
|
|
// address.
|
|
Data->BarrierIndex = (BarrierIndex + 1) % 2;
|
|
break;
|
|
}
|
|
|
|
case ompt_sync_region_taskwait: {
|
|
if (Data->execution > 1)
|
|
TsanHappensAfter(Data->GetTaskwaitPtr());
|
|
break;
|
|
}
|
|
|
|
case ompt_sync_region_taskgroup: {
|
|
assert(Data->TaskGroup != nullptr &&
|
|
"Should have at least one taskgroup!");
|
|
|
|
TsanHappensAfter(Data->TaskGroup->GetPtr());
|
|
|
|
// Delete this allocated taskgroup, all descendent task are finished by
|
|
// now.
|
|
Taskgroup *Parent = Data->TaskGroup->Parent;
|
|
delete Data->TaskGroup;
|
|
Data->TaskGroup = Parent;
|
|
break;
|
|
}
|
|
|
|
case ompt_sync_region_reduction:
|
|
// Should not occur according to OpenMP 5.1
|
|
// Tested in OMPT tests
|
|
break;
|
|
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void ompt_tsan_reduction(ompt_sync_region_t kind,
|
|
ompt_scope_endpoint_t endpoint,
|
|
ompt_data_t *parallel_data,
|
|
ompt_data_t *task_data,
|
|
const void *codeptr_ra) {
|
|
switch (endpoint) {
|
|
case ompt_scope_begin:
|
|
switch (kind) {
|
|
case ompt_sync_region_reduction:
|
|
TsanIgnoreWritesBegin();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
case ompt_scope_end:
|
|
switch (kind) {
|
|
case ompt_sync_region_reduction:
|
|
TsanIgnoreWritesEnd();
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
case ompt_scope_beginend:
|
|
// Should not occur according to OpenMP 5.1
|
|
// Tested in OMPT tests
|
|
// Would have no implications for DR detection
|
|
break;
|
|
}
|
|
}
|
|
|
|
/// OMPT event callbacks for handling tasks.
|
|
|
|
static void ompt_tsan_task_create(
|
|
ompt_data_t *parent_task_data, /* id of parent task */
|
|
const ompt_frame_t *parent_frame, /* frame data for parent task */
|
|
ompt_data_t *new_task_data, /* id of created task */
|
|
int type, int has_dependences,
|
|
const void *codeptr_ra) /* pointer to outlined function */
|
|
{
|
|
TaskData *Data;
|
|
assert(new_task_data->ptr == NULL &&
|
|
"Task data should be initialized to NULL");
|
|
if (type & ompt_task_initial) {
|
|
ompt_data_t *parallel_data;
|
|
int team_size = 1;
|
|
ompt_get_parallel_info(0, ¶llel_data, &team_size);
|
|
ParallelData *PData = new ParallelData(nullptr);
|
|
parallel_data->ptr = PData;
|
|
|
|
Data = new TaskData(PData, type);
|
|
new_task_data->ptr = Data;
|
|
} else if (type & ompt_task_undeferred) {
|
|
Data = new TaskData(ToTaskData(parent_task_data), type);
|
|
new_task_data->ptr = Data;
|
|
} else if (type & ompt_task_explicit || type & ompt_task_target) {
|
|
Data = new TaskData(ToTaskData(parent_task_data), type);
|
|
new_task_data->ptr = Data;
|
|
|
|
// Use the newly created address. We cannot use a single address from the
|
|
// parent because that would declare wrong relationships with other
|
|
// sibling tasks that may be created before this task is started!
|
|
TsanHappensBefore(Data->GetTaskPtr());
|
|
ToTaskData(parent_task_data)->execution++;
|
|
}
|
|
}
|
|
|
|
static void __ompt_tsan_release_task(TaskData *task) {
|
|
while (task != nullptr && --task->RefCount == 0) {
|
|
TaskData *Parent = task->Parent;
|
|
if (task->DependencyCount > 0) {
|
|
delete[] task->Dependencies;
|
|
}
|
|
delete task;
|
|
task = Parent;
|
|
}
|
|
}
|
|
|
|
static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
|
|
ompt_task_status_t prior_task_status,
|
|
ompt_data_t *second_task_data) {
|
|
|
|
//
|
|
// The necessary action depends on prior_task_status:
|
|
//
|
|
// ompt_task_early_fulfill = 5,
|
|
// -> ignored
|
|
//
|
|
// ompt_task_late_fulfill = 6,
|
|
// -> first completed, first freed, second ignored
|
|
//
|
|
// ompt_task_complete = 1,
|
|
// ompt_task_cancel = 3,
|
|
// -> first completed, first freed, second starts
|
|
//
|
|
// ompt_task_detach = 4,
|
|
// ompt_task_yield = 2,
|
|
// ompt_task_switch = 7
|
|
// -> first suspended, second starts
|
|
//
|
|
|
|
if (prior_task_status == ompt_task_early_fulfill)
|
|
return;
|
|
|
|
TaskData *FromTask = ToTaskData(first_task_data);
|
|
|
|
// Legacy handling for missing reduction callback
|
|
if (hasReductionCallback < ompt_set_always && FromTask->InBarrier) {
|
|
// We want to ignore writes in the runtime code during barriers,
|
|
// but not when executing tasks with user code!
|
|
TsanIgnoreWritesEnd();
|
|
}
|
|
|
|
// The late fulfill happens after the detached task finished execution
|
|
if (prior_task_status == ompt_task_late_fulfill)
|
|
TsanHappensAfter(FromTask->GetTaskPtr());
|
|
|
|
// task completed execution
|
|
if (prior_task_status == ompt_task_complete ||
|
|
prior_task_status == ompt_task_cancel ||
|
|
prior_task_status == ompt_task_late_fulfill) {
|
|
// Included tasks are executed sequentially, no need to track
|
|
// synchronization
|
|
if (!FromTask->isIncluded()) {
|
|
// Task will finish before a barrier in the surrounding parallel region
|
|
// ...
|
|
ParallelData *PData = FromTask->Team;
|
|
TsanHappensBefore(
|
|
PData->GetBarrierPtr(FromTask->ImplicitTask->BarrierIndex));
|
|
|
|
// ... and before an eventual taskwait by the parent thread.
|
|
TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr());
|
|
|
|
if (FromTask->TaskGroup != nullptr) {
|
|
// This task is part of a taskgroup, so it will finish before the
|
|
// corresponding taskgroup_end.
|
|
TsanHappensBefore(FromTask->TaskGroup->GetPtr());
|
|
}
|
|
}
|
|
|
|
// release dependencies
|
|
for (unsigned i = 0; i < FromTask->DependencyCount; i++) {
|
|
ompt_dependence_t *Dependency = &FromTask->Dependencies[i];
|
|
|
|
// in dependencies block following inout and out dependencies!
|
|
TsanHappensBefore(ToInAddr(Dependency->variable.ptr));
|
|
if (Dependency->dependence_type == ompt_dependence_type_out ||
|
|
Dependency->dependence_type == ompt_dependence_type_inout) {
|
|
TsanHappensBefore(Dependency->variable.ptr);
|
|
}
|
|
}
|
|
// free the previously running task
|
|
__ompt_tsan_release_task(FromTask);
|
|
}
|
|
|
|
// For late fulfill of detached task, there is no task to schedule to
|
|
if (prior_task_status == ompt_task_late_fulfill) {
|
|
return;
|
|
}
|
|
|
|
TaskData *ToTask = ToTaskData(second_task_data);
|
|
// Legacy handling for missing reduction callback
|
|
if (hasReductionCallback < ompt_set_always && ToTask->InBarrier) {
|
|
// We re-enter runtime code which currently performs a barrier.
|
|
TsanIgnoreWritesBegin();
|
|
}
|
|
|
|
// task suspended
|
|
if (prior_task_status == ompt_task_switch ||
|
|
prior_task_status == ompt_task_yield ||
|
|
prior_task_status == ompt_task_detach) {
|
|
// Task may be resumed at a later point in time.
|
|
TsanHappensBefore(FromTask->GetTaskPtr());
|
|
ToTask->ImplicitTask = FromTask->ImplicitTask;
|
|
assert(ToTask->ImplicitTask != NULL &&
|
|
"A task belongs to a team and has an implicit task on the stack");
|
|
}
|
|
|
|
// Handle dependencies on first execution of the task
|
|
if (ToTask->execution == 0) {
|
|
ToTask->execution++;
|
|
for (unsigned i = 0; i < ToTask->DependencyCount; i++) {
|
|
ompt_dependence_t *Dependency = &ToTask->Dependencies[i];
|
|
|
|
TsanHappensAfter(Dependency->variable.ptr);
|
|
// in and inout dependencies are also blocked by prior in dependencies!
|
|
if (Dependency->dependence_type == ompt_dependence_type_out ||
|
|
Dependency->dependence_type == ompt_dependence_type_inout) {
|
|
TsanHappensAfter(ToInAddr(Dependency->variable.ptr));
|
|
}
|
|
}
|
|
}
|
|
// 1. Task will begin execution after it has been created.
|
|
// 2. Task will resume after it has been switched away.
|
|
TsanHappensAfter(ToTask->GetTaskPtr());
|
|
}
|
|
|
|
static void ompt_tsan_dependences(ompt_data_t *task_data,
|
|
const ompt_dependence_t *deps, int ndeps) {
|
|
if (ndeps > 0) {
|
|
// Copy the data to use it in task_switch and task_end.
|
|
TaskData *Data = ToTaskData(task_data);
|
|
Data->Dependencies = new ompt_dependence_t[ndeps];
|
|
std::memcpy(Data->Dependencies, deps, sizeof(ompt_dependence_t) * ndeps);
|
|
Data->DependencyCount = ndeps;
|
|
|
|
// This callback is executed before this task is first started.
|
|
TsanHappensBefore(Data->GetTaskPtr());
|
|
}
|
|
}
|
|
|
|
/// OMPT event callbacks for handling locking.
|
|
static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id,
|
|
const void *codeptr_ra) {
|
|
|
|
// Acquire our own lock to make sure that
|
|
// 1. the previous release has finished.
|
|
// 2. the next acquire doesn't start before we have finished our release.
|
|
LocksMutex.lock();
|
|
std::mutex &Lock = Locks[wait_id];
|
|
LocksMutex.unlock();
|
|
|
|
Lock.lock();
|
|
TsanHappensAfter(&Lock);
|
|
}
|
|
|
|
static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id,
|
|
const void *codeptr_ra) {
|
|
LocksMutex.lock();
|
|
std::mutex &Lock = Locks[wait_id];
|
|
LocksMutex.unlock();
|
|
TsanHappensBefore(&Lock);
|
|
|
|
Lock.unlock();
|
|
}
|
|
|
|
// callback , signature , variable to store result , required support level
|
|
#define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \
|
|
do { \
|
|
ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \
|
|
result = ompt_set_callback(ompt_callback_##event, \
|
|
(ompt_callback_t)tsan_##event); \
|
|
if (result < level) \
|
|
printf("Registered callback '" #event "' is not supported at " #level \
|
|
" (%i)\n", \
|
|
result); \
|
|
} while (0)
|
|
|
|
#define SET_CALLBACK_T(event, type) \
|
|
do { \
|
|
int res; \
|
|
SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \
|
|
} while (0)
|
|
|
|
#define SET_CALLBACK(event) SET_CALLBACK_T(event, event)
|
|
|
|
static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num,
|
|
ompt_data_t *tool_data) {
|
|
const char *options = getenv("TSAN_OPTIONS");
|
|
TsanFlags tsan_flags(options);
|
|
|
|
ompt_set_callback_t ompt_set_callback =
|
|
(ompt_set_callback_t)lookup("ompt_set_callback");
|
|
if (ompt_set_callback == NULL) {
|
|
std::cerr << "Could not set callback, exiting..." << std::endl;
|
|
std::exit(1);
|
|
}
|
|
ompt_get_parallel_info =
|
|
(ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
|
|
ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
|
|
|
|
if (ompt_get_parallel_info == NULL) {
|
|
fprintf(stderr, "Could not get inquiry function 'ompt_get_parallel_info', "
|
|
"exiting...\n");
|
|
exit(1);
|
|
}
|
|
|
|
SET_CALLBACK(thread_begin);
|
|
SET_CALLBACK(thread_end);
|
|
SET_CALLBACK(parallel_begin);
|
|
SET_CALLBACK(implicit_task);
|
|
SET_CALLBACK(sync_region);
|
|
SET_CALLBACK(parallel_end);
|
|
|
|
SET_CALLBACK(task_create);
|
|
SET_CALLBACK(task_schedule);
|
|
SET_CALLBACK(dependences);
|
|
|
|
SET_CALLBACK_T(mutex_acquired, mutex);
|
|
SET_CALLBACK_T(mutex_released, mutex);
|
|
SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback,
|
|
ompt_set_never);
|
|
|
|
if (!tsan_flags.ignore_noninstrumented_modules)
|
|
fprintf(stderr,
|
|
"Warning: please export "
|
|
"TSAN_OPTIONS='ignore_noninstrumented_modules=1' "
|
|
"to avoid false positive reports from the OpenMP runtime!\n");
|
|
if (archer_flags->ignore_serial)
|
|
TsanIgnoreWritesBegin();
|
|
return 1; // success
|
|
}
|
|
|
|
static void ompt_tsan_finalize(ompt_data_t *tool_data) {
|
|
if (archer_flags->ignore_serial)
|
|
TsanIgnoreWritesEnd();
|
|
if (archer_flags->print_max_rss) {
|
|
struct rusage end;
|
|
getrusage(RUSAGE_SELF, &end);
|
|
printf("MAX RSS[KBytes] during execution: %ld\n", end.ru_maxrss);
|
|
}
|
|
|
|
if (archer_flags)
|
|
delete archer_flags;
|
|
}
|
|
|
|
extern "C" ompt_start_tool_result_t *
|
|
ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
|
|
const char *options = getenv("ARCHER_OPTIONS");
|
|
archer_flags = new ArcherFlags(options);
|
|
if (!archer_flags->enabled) {
|
|
if (archer_flags->verbose)
|
|
std::cout << "Archer disabled, stopping operation" << std::endl;
|
|
delete archer_flags;
|
|
return NULL;
|
|
}
|
|
|
|
static ompt_start_tool_result_t ompt_start_tool_result = {
|
|
&ompt_tsan_initialize, &ompt_tsan_finalize, {0}};
|
|
runOnTsan = 1;
|
|
RunningOnValgrind();
|
|
if (!runOnTsan) // if we are not running on TSAN, give a different tool the
|
|
// chance to be loaded
|
|
{
|
|
if (archer_flags->verbose)
|
|
std::cout << "Archer detected OpenMP application without TSan "
|
|
"stopping operation"
|
|
<< std::endl;
|
|
delete archer_flags;
|
|
return NULL;
|
|
}
|
|
|
|
if (archer_flags->verbose)
|
|
std::cout << "Archer detected OpenMP application with TSan, supplying "
|
|
"OpenMP synchronization semantics"
|
|
<< std::endl;
|
|
return &ompt_start_tool_result;
|
|
}
|