/* * ompt-tsan.cpp -- Archer runtime library, TSan annotations for Archer */ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for details. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef __STDC_FORMAT_MACROS #define __STDC_FORMAT_MACROS #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if (defined __APPLE__ && defined __MACH__) #include #endif #include "omp-tools.h" #include // Define attribute that indicates that the fall through from the previous // case label is intentional and should not be diagnosed by a compiler // Code from libcxx/include/__config // Use a function like macro to imply that it must be followed by a semicolon #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough) #define KMP_FALLTHROUGH() [[fallthrough]] #elif __has_cpp_attribute(clang::fallthrough) #define KMP_FALLTHROUGH() [[clang::fallthrough]] #elif __has_attribute(fallthrough) || __GNUC__ >= 7 #define KMP_FALLTHROUGH() __attribute__((__fallthrough__)) #else #define KMP_FALLTHROUGH() ((void)0) #endif static int runOnTsan; static int hasReductionCallback; class ArcherFlags { public: #if (LLVM_VERSION) >= 40 int flush_shadow{0}; #endif int print_max_rss{0}; int verbose{0}; int enabled{1}; int ignore_serial{0}; ArcherFlags(const char *env) { if (env) { std::vector tokens; std::string token; std::string str(env); std::istringstream iss(str); while (std::getline(iss, token, ' ')) tokens.push_back(token); for (std::vector::iterator it = tokens.begin(); it != tokens.end(); ++it) { #if (LLVM_VERSION) >= 40 if (sscanf(it->c_str(), "flush_shadow=%d", &flush_shadow)) continue; #endif if (sscanf(it->c_str(), "print_max_rss=%d", &print_max_rss)) continue; if (sscanf(it->c_str(), "verbose=%d", &verbose)) continue; if (sscanf(it->c_str(), "enable=%d", &enabled)) continue; if (sscanf(it->c_str(), "ignore_serial=%d", &ignore_serial)) continue; std::cerr << "Illegal values for ARCHER_OPTIONS variable: " << token << std::endl; } } } }; class TsanFlags { public: int ignore_noninstrumented_modules; TsanFlags(const char *env) : ignore_noninstrumented_modules(0) { if (env) { std::vector tokens; std::string str(env); auto end = str.end(); auto it = str.begin(); auto is_sep = [](char c) { return c == ' ' || c == ',' || c == ':' || c == '\n' || c == '\t' || c == '\r'; }; while (it != end) { auto next_it = std::find_if(it, end, is_sep); tokens.emplace_back(it, next_it); it = next_it; if (it != end) { ++it; } } for (const auto &token : tokens) { // we are interested in ignore_noninstrumented_modules to print a // warning if (sscanf(token.c_str(), "ignore_noninstrumented_modules=%d", &ignore_noninstrumented_modules)) continue; } } } }; #if (LLVM_VERSION) >= 40 extern "C" { int __attribute__((weak)) __archer_get_omp_status(); void __attribute__((weak)) __tsan_flush_memory() {} } #endif ArcherFlags *archer_flags; // The following definitions are pasted from "llvm/Support/Compiler.h" to allow // the code // to be compiled with other compilers like gcc: #ifndef TsanHappensBefore // Thread Sanitizer is a tool that finds races in code. // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations . // tsan detects these exact functions by name. extern "C" { #if (defined __APPLE__ && defined __MACH__) static void AnnotateHappensAfter(const char *file, int line, const volatile void *cv) { void (*fptr)(const char *, int, const volatile void *); fptr = (void (*)(const char *, int, const volatile void *))dlsym( RTLD_DEFAULT, "AnnotateHappensAfter"); (*fptr)(file, line, cv); } static void AnnotateHappensBefore(const char *file, int line, const volatile void *cv) { void (*fptr)(const char *, int, const volatile void *); fptr = (void (*)(const char *, int, const volatile void *))dlsym( RTLD_DEFAULT, "AnnotateHappensBefore"); (*fptr)(file, line, cv); } static void AnnotateIgnoreWritesBegin(const char *file, int line) { void (*fptr)(const char *, int); fptr = (void (*)(const char *, int))dlsym(RTLD_DEFAULT, "AnnotateIgnoreWritesBegin"); (*fptr)(file, line); } static void AnnotateIgnoreWritesEnd(const char *file, int line) { void (*fptr)(const char *, int); fptr = (void (*)(const char *, int))dlsym(RTLD_DEFAULT, "AnnotateIgnoreWritesEnd"); (*fptr)(file, line); } static void AnnotateNewMemory(const char *file, int line, const volatile void *cv, size_t size) { void (*fptr)(const char *, int, const volatile void *, size_t); fptr = (void (*)(const char *, int, const volatile void *, size_t))dlsym( RTLD_DEFAULT, "AnnotateNewMemory"); (*fptr)(file, line, cv, size); } static int RunningOnValgrind() { int (*fptr)(); fptr = (int (*)())dlsym(RTLD_DEFAULT, "RunningOnValgrind"); if (fptr && fptr != RunningOnValgrind) runOnTsan = 0; return 0; } #else void __attribute__((weak)) AnnotateHappensAfter(const char *file, int line, const volatile void *cv) {} void __attribute__((weak)) AnnotateHappensBefore(const char *file, int line, const volatile void *cv) {} void __attribute__((weak)) AnnotateIgnoreWritesBegin(const char *file, int line) {} void __attribute__((weak)) AnnotateIgnoreWritesEnd(const char *file, int line) { } void __attribute__((weak)) AnnotateNewMemory(const char *file, int line, const volatile void *cv, size_t size) {} int __attribute__((weak)) RunningOnValgrind() { runOnTsan = 0; return 0; } void __attribute__((weak)) __tsan_func_entry(const void *call_pc) {} void __attribute__((weak)) __tsan_func_exit(void) {} #endif } // This marker is used to define a happens-before arc. The race detector will // infer an arc from the begin to the end when they share the same pointer // argument. #define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv) // This marker defines the destination of a happens-before arc. #define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv) // Ignore any races on writes between here and the next TsanIgnoreWritesEnd. #define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__) // Resume checking for racy writes. #define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__) // We don't really delete the clock for now #define TsanDeleteClock(cv) // newMemory #define TsanNewMemory(addr, size) \ AnnotateNewMemory(__FILE__, __LINE__, addr, size) #define TsanFreeMemory(addr, size) \ AnnotateNewMemory(__FILE__, __LINE__, addr, size) #endif // Function entry/exit #define TsanFuncEntry(pc) __tsan_func_entry(pc) #define TsanFuncExit() __tsan_func_exit() /// Required OMPT inquiry functions. static ompt_get_parallel_info_t ompt_get_parallel_info; static ompt_get_thread_data_t ompt_get_thread_data; typedef uint64_t ompt_tsan_clockid; static uint64_t my_next_id() { static uint64_t ID = 0; uint64_t ret = __sync_fetch_and_add(&ID, 1); return ret; } // Data structure to provide a threadsafe pool of reusable objects. // DataPool template struct DataPool { std::mutex DPMutex; std::stack DataPointer; std::list memory; int total; void newDatas() { // prefix the Data with a pointer to 'this', allows to return memory to // 'this', // without explicitly knowing the source. // // To reduce lock contention, we use thread local DataPools, but Data // objects move to other threads. // The strategy is to get objects from local pool. Only if the object moved // to another // thread, we might see a penalty on release (returnData). // For "single producer" pattern, a single thread creates tasks, these are // executed by other threads. // The master will have a high demand on TaskData, so return after use. struct pooldata { DataPool *dp; T data; }; // We alloc without initialize the memory. We cannot call constructors. // Therefore use malloc! pooldata *datas = (pooldata *)malloc(sizeof(pooldata) * N); memory.push_back(datas); for (int i = 0; i < N; i++) { datas[i].dp = this; DataPointer.push(&(datas[i].data)); } total += N; } T *getData() { T *ret; DPMutex.lock(); if (DataPointer.empty()) newDatas(); ret = DataPointer.top(); DataPointer.pop(); DPMutex.unlock(); return ret; } void returnData(T *data) { DPMutex.lock(); DataPointer.push(data); DPMutex.unlock(); } void getDatas(int n, T **datas) { DPMutex.lock(); for (int i = 0; i < n; i++) { if (DataPointer.empty()) newDatas(); datas[i] = DataPointer.top(); DataPointer.pop(); } DPMutex.unlock(); } void returnDatas(int n, T **datas) { DPMutex.lock(); for (int i = 0; i < n; i++) { DataPointer.push(datas[i]); } DPMutex.unlock(); } DataPool() : DPMutex(), DataPointer(), total(0) {} ~DataPool() { // we assume all memory is returned when the thread finished / destructor is // called for (auto i : memory) if (i) free(i); } }; // This function takes care to return the data to the originating DataPool // A pointer to the originating DataPool is stored just before the actual data. template static void retData(void *data) { ((DataPool **)data)[-1]->returnData((T *)data); } struct ParallelData; __thread DataPool *pdp; /// Data structure to store additional information for parallel regions. struct ParallelData { // Parallel fork is just another barrier, use Barrier[1] /// Two addresses for relationships with barriers. ompt_tsan_clockid Barrier[2]; const void *codePtr; void *GetParallelPtr() { return &(Barrier[1]); } void *GetBarrierPtr(unsigned Index) { return &(Barrier[Index]); } ParallelData(const void *codeptr) : codePtr(codeptr) {} ~ParallelData() { TsanDeleteClock(&(Barrier[0])); TsanDeleteClock(&(Barrier[1])); } // overload new/delete to use DataPool for memory management. void *operator new(size_t size) { return pdp->getData(); } void operator delete(void *p, size_t) { retData(p); } }; static inline ParallelData *ToParallelData(ompt_data_t *parallel_data) { return reinterpret_cast(parallel_data->ptr); } struct Taskgroup; __thread DataPool *tgp; /// Data structure to support stacking of taskgroups and allow synchronization. struct Taskgroup { /// Its address is used for relationships of the taskgroup's task set. ompt_tsan_clockid Ptr; /// Reference to the parent taskgroup. Taskgroup *Parent; Taskgroup(Taskgroup *Parent) : Parent(Parent) {} ~Taskgroup() { TsanDeleteClock(&Ptr); } void *GetPtr() { return &Ptr; } // overload new/delete to use DataPool for memory management. void *operator new(size_t size) { return tgp->getData(); } void operator delete(void *p, size_t) { retData(p); } }; struct TaskData; __thread DataPool *tdp; /// Data structure to store additional information for tasks. struct TaskData { /// Its address is used for relationships of this task. ompt_tsan_clockid Task; /// Child tasks use its address to declare a relationship to a taskwait in /// this task. ompt_tsan_clockid Taskwait; /// Whether this task is currently executing a barrier. bool InBarrier; /// Whether this task is an included task. int TaskType{0}; /// Index of which barrier to use next. char BarrierIndex; /// Count how often this structure has been put into child tasks + 1. std::atomic_int RefCount; /// Reference to the parent that created this task. TaskData *Parent; /// Reference to the implicit task in the stack above this task. TaskData *ImplicitTask; /// Reference to the team of this task. ParallelData *Team; /// Reference to the current taskgroup that this task either belongs to or /// that it just created. Taskgroup *TaskGroup; /// Dependency information for this task. ompt_dependence_t *Dependencies; /// Number of dependency entries. unsigned DependencyCount; void *PrivateData; size_t PrivateDataSize; int execution; int freed; TaskData(TaskData *Parent, int taskType) : InBarrier(false), TaskType(taskType), BarrierIndex(0), RefCount(1), Parent(Parent), ImplicitTask(nullptr), Team(Parent->Team), TaskGroup(nullptr), DependencyCount(0), execution(0), freed(0) { if (Parent != nullptr) { Parent->RefCount++; // Copy over pointer to taskgroup. This task may set up its own stack // but for now belongs to its parent's taskgroup. TaskGroup = Parent->TaskGroup; } } TaskData(ParallelData *Team, int taskType) : InBarrier(false), TaskType(taskType), BarrierIndex(0), RefCount(1), Parent(nullptr), ImplicitTask(this), Team(Team), TaskGroup(nullptr), DependencyCount(0), execution(1), freed(0) {} ~TaskData() { TsanDeleteClock(&Task); TsanDeleteClock(&Taskwait); } bool isIncluded() { return TaskType & ompt_task_undeferred; } bool isUntied() { return TaskType & ompt_task_untied; } bool isFinal() { return TaskType & ompt_task_final; } bool isMergable() { return TaskType & ompt_task_mergeable; } bool isMerged() { return TaskType & ompt_task_merged; } bool isExplicit() { return TaskType & ompt_task_explicit; } bool isImplicit() { return TaskType & ompt_task_implicit; } bool isInitial() { return TaskType & ompt_task_initial; } bool isTarget() { return TaskType & ompt_task_target; } void *GetTaskPtr() { return &Task; } void *GetTaskwaitPtr() { return &Taskwait; } // overload new/delete to use DataPool for memory management. void *operator new(size_t size) { return tdp->getData(); } void operator delete(void *p, size_t) { retData(p); } }; static inline TaskData *ToTaskData(ompt_data_t *task_data) { return reinterpret_cast(task_data->ptr); } static inline void *ToInAddr(void *OutAddr) { // FIXME: This will give false negatives when a second variable lays directly // behind a variable that only has a width of 1 byte. // Another approach would be to "negate" the address or to flip the // first bit... return reinterpret_cast(OutAddr) + 1; } /// Store a mutex for each wait_id to resolve race condition with callbacks. std::unordered_map Locks; std::mutex LocksMutex; static void ompt_tsan_thread_begin(ompt_thread_t thread_type, ompt_data_t *thread_data) { pdp = new DataPool; TsanNewMemory(pdp, sizeof(pdp)); tgp = new DataPool; TsanNewMemory(tgp, sizeof(tgp)); tdp = new DataPool; TsanNewMemory(tdp, sizeof(tdp)); thread_data->value = my_next_id(); } static void ompt_tsan_thread_end(ompt_data_t *thread_data) { delete pdp; delete tgp; delete tdp; } /// OMPT event callbacks for handling parallel regions. static void ompt_tsan_parallel_begin(ompt_data_t *parent_task_data, const ompt_frame_t *parent_task_frame, ompt_data_t *parallel_data, uint32_t requested_team_size, int flag, const void *codeptr_ra) { ParallelData *Data = new ParallelData(codeptr_ra); parallel_data->ptr = Data; TsanHappensBefore(Data->GetParallelPtr()); if (archer_flags->ignore_serial && ToTaskData(parent_task_data)->isInitial()) TsanIgnoreWritesEnd(); } static void ompt_tsan_parallel_end(ompt_data_t *parallel_data, ompt_data_t *task_data, int flag, const void *codeptr_ra) { if (archer_flags->ignore_serial && ToTaskData(task_data)->isInitial()) TsanIgnoreWritesBegin(); ParallelData *Data = ToParallelData(parallel_data); TsanHappensAfter(Data->GetBarrierPtr(0)); TsanHappensAfter(Data->GetBarrierPtr(1)); delete Data; #if (LLVM_VERSION >= 40) if (&__archer_get_omp_status) { if (__archer_get_omp_status() == 0 && archer_flags->flush_shadow) __tsan_flush_memory(); } #endif } static void ompt_tsan_implicit_task(ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, unsigned int team_size, unsigned int thread_num, int type) { switch (endpoint) { case ompt_scope_begin: if (type & ompt_task_initial) { parallel_data->ptr = new ParallelData(nullptr); } task_data->ptr = new TaskData(ToParallelData(parallel_data), type); TsanHappensAfter(ToParallelData(parallel_data)->GetParallelPtr()); TsanFuncEntry(ToParallelData(parallel_data)->codePtr); break; case ompt_scope_end: { TaskData *Data = ToTaskData(task_data); assert(Data->freed == 0 && "Implicit task end should only be called once!"); Data->freed = 1; assert(Data->RefCount == 1 && "All tasks should have finished at the implicit barrier!"); delete Data; TsanFuncExit(); break; } case ompt_scope_beginend: // Should not occur according to OpenMP 5.1 // Tested in OMPT tests break; } } static void ompt_tsan_sync_region(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra) { TaskData *Data = ToTaskData(task_data); switch (endpoint) { case ompt_scope_begin: case ompt_scope_beginend: TsanFuncEntry(codeptr_ra); switch (kind) { case ompt_sync_region_barrier_implementation: case ompt_sync_region_barrier_implicit: case ompt_sync_region_barrier_explicit: case ompt_sync_region_barrier_implicit_parallel: case ompt_sync_region_barrier_implicit_workshare: case ompt_sync_region_barrier_teams: case ompt_sync_region_barrier: { char BarrierIndex = Data->BarrierIndex; TsanHappensBefore(Data->Team->GetBarrierPtr(BarrierIndex)); if (hasReductionCallback < ompt_set_always) { // We ignore writes inside the barrier. These would either occur during // 1. reductions performed by the runtime which are guaranteed to be // race-free. // 2. execution of another task. // For the latter case we will re-enable tracking in task_switch. Data->InBarrier = true; TsanIgnoreWritesBegin(); } break; } case ompt_sync_region_taskwait: break; case ompt_sync_region_taskgroup: Data->TaskGroup = new Taskgroup(Data->TaskGroup); break; case ompt_sync_region_reduction: // should never be reached break; } if (endpoint == ompt_scope_begin) break; KMP_FALLTHROUGH(); case ompt_scope_end: TsanFuncExit(); switch (kind) { case ompt_sync_region_barrier_implementation: case ompt_sync_region_barrier_implicit: case ompt_sync_region_barrier_explicit: case ompt_sync_region_barrier_implicit_parallel: case ompt_sync_region_barrier_implicit_workshare: case ompt_sync_region_barrier_teams: case ompt_sync_region_barrier: { if (hasReductionCallback < ompt_set_always) { // We want to track writes after the barrier again. Data->InBarrier = false; TsanIgnoreWritesEnd(); } char BarrierIndex = Data->BarrierIndex; // Barrier will end after it has been entered by all threads. if (parallel_data) TsanHappensAfter(Data->Team->GetBarrierPtr(BarrierIndex)); // It is not guaranteed that all threads have exited this barrier before // we enter the next one. So we will use a different address. // We are however guaranteed that this current barrier is finished // by the time we exit the next one. So we can then reuse the first // address. Data->BarrierIndex = (BarrierIndex + 1) % 2; break; } case ompt_sync_region_taskwait: { if (Data->execution > 1) TsanHappensAfter(Data->GetTaskwaitPtr()); break; } case ompt_sync_region_taskgroup: { assert(Data->TaskGroup != nullptr && "Should have at least one taskgroup!"); TsanHappensAfter(Data->TaskGroup->GetPtr()); // Delete this allocated taskgroup, all descendent task are finished by // now. Taskgroup *Parent = Data->TaskGroup->Parent; delete Data->TaskGroup; Data->TaskGroup = Parent; break; } case ompt_sync_region_reduction: // Should not occur according to OpenMP 5.1 // Tested in OMPT tests break; } break; } } static void ompt_tsan_reduction(ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data, ompt_data_t *task_data, const void *codeptr_ra) { switch (endpoint) { case ompt_scope_begin: switch (kind) { case ompt_sync_region_reduction: TsanIgnoreWritesBegin(); break; default: break; } break; case ompt_scope_end: switch (kind) { case ompt_sync_region_reduction: TsanIgnoreWritesEnd(); break; default: break; } break; case ompt_scope_beginend: // Should not occur according to OpenMP 5.1 // Tested in OMPT tests // Would have no implications for DR detection break; } } /// OMPT event callbacks for handling tasks. static void ompt_tsan_task_create( ompt_data_t *parent_task_data, /* id of parent task */ const ompt_frame_t *parent_frame, /* frame data for parent task */ ompt_data_t *new_task_data, /* id of created task */ int type, int has_dependences, const void *codeptr_ra) /* pointer to outlined function */ { TaskData *Data; assert(new_task_data->ptr == NULL && "Task data should be initialized to NULL"); if (type & ompt_task_initial) { ompt_data_t *parallel_data; int team_size = 1; ompt_get_parallel_info(0, ¶llel_data, &team_size); ParallelData *PData = new ParallelData(nullptr); parallel_data->ptr = PData; Data = new TaskData(PData, type); new_task_data->ptr = Data; } else if (type & ompt_task_undeferred) { Data = new TaskData(ToTaskData(parent_task_data), type); new_task_data->ptr = Data; } else if (type & ompt_task_explicit || type & ompt_task_target) { Data = new TaskData(ToTaskData(parent_task_data), type); new_task_data->ptr = Data; // Use the newly created address. We cannot use a single address from the // parent because that would declare wrong relationships with other // sibling tasks that may be created before this task is started! TsanHappensBefore(Data->GetTaskPtr()); ToTaskData(parent_task_data)->execution++; } } static void __ompt_tsan_release_task(TaskData *task) { while (task != nullptr && --task->RefCount == 0) { TaskData *Parent = task->Parent; if (task->DependencyCount > 0) { delete[] task->Dependencies; } delete task; task = Parent; } } static void ompt_tsan_task_schedule(ompt_data_t *first_task_data, ompt_task_status_t prior_task_status, ompt_data_t *second_task_data) { // // The necessary action depends on prior_task_status: // // ompt_task_early_fulfill = 5, // -> ignored // // ompt_task_late_fulfill = 6, // -> first completed, first freed, second ignored // // ompt_task_complete = 1, // ompt_task_cancel = 3, // -> first completed, first freed, second starts // // ompt_task_detach = 4, // ompt_task_yield = 2, // ompt_task_switch = 7 // -> first suspended, second starts // if (prior_task_status == ompt_task_early_fulfill) return; TaskData *FromTask = ToTaskData(first_task_data); // Legacy handling for missing reduction callback if (hasReductionCallback < ompt_set_always && FromTask->InBarrier) { // We want to ignore writes in the runtime code during barriers, // but not when executing tasks with user code! TsanIgnoreWritesEnd(); } // The late fulfill happens after the detached task finished execution if (prior_task_status == ompt_task_late_fulfill) TsanHappensAfter(FromTask->GetTaskPtr()); // task completed execution if (prior_task_status == ompt_task_complete || prior_task_status == ompt_task_cancel || prior_task_status == ompt_task_late_fulfill) { // Included tasks are executed sequentially, no need to track // synchronization if (!FromTask->isIncluded()) { // Task will finish before a barrier in the surrounding parallel region // ... ParallelData *PData = FromTask->Team; TsanHappensBefore( PData->GetBarrierPtr(FromTask->ImplicitTask->BarrierIndex)); // ... and before an eventual taskwait by the parent thread. TsanHappensBefore(FromTask->Parent->GetTaskwaitPtr()); if (FromTask->TaskGroup != nullptr) { // This task is part of a taskgroup, so it will finish before the // corresponding taskgroup_end. TsanHappensBefore(FromTask->TaskGroup->GetPtr()); } } // release dependencies for (unsigned i = 0; i < FromTask->DependencyCount; i++) { ompt_dependence_t *Dependency = &FromTask->Dependencies[i]; // in dependencies block following inout and out dependencies! TsanHappensBefore(ToInAddr(Dependency->variable.ptr)); if (Dependency->dependence_type == ompt_dependence_type_out || Dependency->dependence_type == ompt_dependence_type_inout) { TsanHappensBefore(Dependency->variable.ptr); } } // free the previously running task __ompt_tsan_release_task(FromTask); } // For late fulfill of detached task, there is no task to schedule to if (prior_task_status == ompt_task_late_fulfill) { return; } TaskData *ToTask = ToTaskData(second_task_data); // Legacy handling for missing reduction callback if (hasReductionCallback < ompt_set_always && ToTask->InBarrier) { // We re-enter runtime code which currently performs a barrier. TsanIgnoreWritesBegin(); } // task suspended if (prior_task_status == ompt_task_switch || prior_task_status == ompt_task_yield || prior_task_status == ompt_task_detach) { // Task may be resumed at a later point in time. TsanHappensBefore(FromTask->GetTaskPtr()); ToTask->ImplicitTask = FromTask->ImplicitTask; assert(ToTask->ImplicitTask != NULL && "A task belongs to a team and has an implicit task on the stack"); } // Handle dependencies on first execution of the task if (ToTask->execution == 0) { ToTask->execution++; for (unsigned i = 0; i < ToTask->DependencyCount; i++) { ompt_dependence_t *Dependency = &ToTask->Dependencies[i]; TsanHappensAfter(Dependency->variable.ptr); // in and inout dependencies are also blocked by prior in dependencies! if (Dependency->dependence_type == ompt_dependence_type_out || Dependency->dependence_type == ompt_dependence_type_inout) { TsanHappensAfter(ToInAddr(Dependency->variable.ptr)); } } } // 1. Task will begin execution after it has been created. // 2. Task will resume after it has been switched away. TsanHappensAfter(ToTask->GetTaskPtr()); } static void ompt_tsan_dependences(ompt_data_t *task_data, const ompt_dependence_t *deps, int ndeps) { if (ndeps > 0) { // Copy the data to use it in task_switch and task_end. TaskData *Data = ToTaskData(task_data); Data->Dependencies = new ompt_dependence_t[ndeps]; std::memcpy(Data->Dependencies, deps, sizeof(ompt_dependence_t) * ndeps); Data->DependencyCount = ndeps; // This callback is executed before this task is first started. TsanHappensBefore(Data->GetTaskPtr()); } } /// OMPT event callbacks for handling locking. static void ompt_tsan_mutex_acquired(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra) { // Acquire our own lock to make sure that // 1. the previous release has finished. // 2. the next acquire doesn't start before we have finished our release. LocksMutex.lock(); std::mutex &Lock = Locks[wait_id]; LocksMutex.unlock(); Lock.lock(); TsanHappensAfter(&Lock); } static void ompt_tsan_mutex_released(ompt_mutex_t kind, ompt_wait_id_t wait_id, const void *codeptr_ra) { LocksMutex.lock(); std::mutex &Lock = Locks[wait_id]; LocksMutex.unlock(); TsanHappensBefore(&Lock); Lock.unlock(); } // callback , signature , variable to store result , required support level #define SET_OPTIONAL_CALLBACK_T(event, type, result, level) \ do { \ ompt_callback_##type##_t tsan_##event = &ompt_tsan_##event; \ result = ompt_set_callback(ompt_callback_##event, \ (ompt_callback_t)tsan_##event); \ if (result < level) \ printf("Registered callback '" #event "' is not supported at " #level \ " (%i)\n", \ result); \ } while (0) #define SET_CALLBACK_T(event, type) \ do { \ int res; \ SET_OPTIONAL_CALLBACK_T(event, type, res, ompt_set_always); \ } while (0) #define SET_CALLBACK(event) SET_CALLBACK_T(event, event) static int ompt_tsan_initialize(ompt_function_lookup_t lookup, int device_num, ompt_data_t *tool_data) { const char *options = getenv("TSAN_OPTIONS"); TsanFlags tsan_flags(options); ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback"); if (ompt_set_callback == NULL) { std::cerr << "Could not set callback, exiting..." << std::endl; std::exit(1); } ompt_get_parallel_info = (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info"); ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data"); if (ompt_get_parallel_info == NULL) { fprintf(stderr, "Could not get inquiry function 'ompt_get_parallel_info', " "exiting...\n"); exit(1); } SET_CALLBACK(thread_begin); SET_CALLBACK(thread_end); SET_CALLBACK(parallel_begin); SET_CALLBACK(implicit_task); SET_CALLBACK(sync_region); SET_CALLBACK(parallel_end); SET_CALLBACK(task_create); SET_CALLBACK(task_schedule); SET_CALLBACK(dependences); SET_CALLBACK_T(mutex_acquired, mutex); SET_CALLBACK_T(mutex_released, mutex); SET_OPTIONAL_CALLBACK_T(reduction, sync_region, hasReductionCallback, ompt_set_never); if (!tsan_flags.ignore_noninstrumented_modules) fprintf(stderr, "Warning: please export " "TSAN_OPTIONS='ignore_noninstrumented_modules=1' " "to avoid false positive reports from the OpenMP runtime!\n"); if (archer_flags->ignore_serial) TsanIgnoreWritesBegin(); return 1; // success } static void ompt_tsan_finalize(ompt_data_t *tool_data) { if (archer_flags->ignore_serial) TsanIgnoreWritesEnd(); if (archer_flags->print_max_rss) { struct rusage end; getrusage(RUSAGE_SELF, &end); printf("MAX RSS[KBytes] during execution: %ld\n", end.ru_maxrss); } if (archer_flags) delete archer_flags; } extern "C" ompt_start_tool_result_t * ompt_start_tool(unsigned int omp_version, const char *runtime_version) { const char *options = getenv("ARCHER_OPTIONS"); archer_flags = new ArcherFlags(options); if (!archer_flags->enabled) { if (archer_flags->verbose) std::cout << "Archer disabled, stopping operation" << std::endl; delete archer_flags; return NULL; } static ompt_start_tool_result_t ompt_start_tool_result = { &ompt_tsan_initialize, &ompt_tsan_finalize, {0}}; runOnTsan = 1; RunningOnValgrind(); if (!runOnTsan) // if we are not running on TSAN, give a different tool the // chance to be loaded { if (archer_flags->verbose) std::cout << "Archer detected OpenMP application without TSan " "stopping operation" << std::endl; delete archer_flags; return NULL; } if (archer_flags->verbose) std::cout << "Archer detected OpenMP application with TSan, supplying " "OpenMP synchronization semantics" << std::endl; return &ompt_start_tool_result; }