//===-- xray_basic_logging.cpp ----------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of XRay, a dynamic runtime instrumentation system. // // Implementation of a simple in-memory log of XRay events. This defines a // logging function that's compatible with the XRay handler interface, and // routines for exporting data to files. // //===----------------------------------------------------------------------===// #include #include #include #include #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC #include #endif #include #include #include #include "sanitizer_common/sanitizer_allocator_internal.h" #include "sanitizer_common/sanitizer_libc.h" #include "xray/xray_records.h" #include "xray_recursion_guard.h" #include "xray_basic_flags.h" #include "xray_basic_logging.h" #include "xray_defs.h" #include "xray_flags.h" #include "xray_interface_internal.h" #include "xray_tsc.h" #include "xray_utils.h" namespace __xray { static SpinMutex LogMutex; namespace { // We use elements of this type to record the entry TSC of every function ID we // see as we're tracing a particular thread's execution. struct alignas(16) StackEntry { int32_t FuncId; uint16_t Type; uint8_t CPU; uint8_t Padding; uint64_t TSC; }; static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry"); struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { void *InMemoryBuffer = nullptr; size_t BufferSize = 0; size_t BufferOffset = 0; void *ShadowStack = nullptr; size_t StackSize = 0; size_t StackEntries = 0; __xray::LogWriter *LogWriter = nullptr; }; struct BasicLoggingOptions { int DurationFilterMicros = 0; size_t MaxStackDepth = 0; size_t ThreadBufferSize = 0; }; } // namespace static pthread_key_t PThreadKey; static atomic_uint8_t BasicInitialized{0}; struct BasicLoggingOptions GlobalOptions; thread_local atomic_uint8_t Guard{0}; static atomic_uint8_t UseRealTSC{0}; static atomic_uint64_t ThresholdTicks{0}; static atomic_uint64_t TicksPerSec{0}; static atomic_uint64_t CycleFrequency{NanosecondsPerSecond}; static LogWriter *getLog() XRAY_NEVER_INSTRUMENT { LogWriter* LW = LogWriter::Open(); if (LW == nullptr) return LW; static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT; pthread_once(&DetectOnce, +[] { if (atomic_load(&UseRealTSC, memory_order_acquire)) atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release); }); // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads // logging do writes which just append. XRayFileHeader Header; // Version 2 includes tail exit records. // Version 3 includes pid inside records. Header.Version = 3; Header.Type = FileTypes::NAIVE_LOG; Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire); // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' // before setting the values in the header. Header.ConstantTSC = 1; Header.NonstopTSC = 1; LW->WriteAll(reinterpret_cast(&Header), reinterpret_cast(&Header) + sizeof(Header)); return LW; } static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT { static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; static LogWriter *LW = nullptr; pthread_once(&OnceInit, +[] { LW = getLog(); }); return LW; } static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { thread_local ThreadLocalData TLD; thread_local bool UNUSED TOnce = [] { if (GlobalOptions.ThreadBufferSize == 0) { if (Verbosity()) Report("Not initializing TLD since ThreadBufferSize == 0.\n"); return false; } pthread_setspecific(PThreadKey, &TLD); TLD.LogWriter = getGlobalLog(); TLD.InMemoryBuffer = reinterpret_cast( InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize, nullptr, alignof(XRayRecord))); TLD.BufferSize = GlobalOptions.ThreadBufferSize; TLD.BufferOffset = 0; if (GlobalOptions.MaxStackDepth == 0) { if (Verbosity()) Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n"); TLD.StackSize = 0; TLD.StackEntries = 0; TLD.ShadowStack = nullptr; return false; } TLD.ShadowStack = reinterpret_cast( InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr, alignof(StackEntry))); TLD.StackSize = GlobalOptions.MaxStackDepth; TLD.StackEntries = 0; return false; }(); return TLD; } template void InMemoryRawLog(int32_t FuncId, XRayEntryType Type, RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); LogWriter *LW = getGlobalLog(); if (LW == nullptr) return; // Use a simple recursion guard, to handle cases where we're already logging // and for one reason or another, this function gets called again in the same // thread. RecursionGuard G(Guard); if (!G) return; uint8_t CPU = 0; uint64_t TSC = ReadTSC(CPU); switch (Type) { case XRayEntryType::ENTRY: case XRayEntryType::LOG_ARGS_ENTRY: { // Short circuit if we've reached the maximum depth of the stack. if (TLD.StackEntries++ >= TLD.StackSize) return; // When we encounter an entry event, we keep track of the TSC and the CPU, // and put it in the stack. StackEntry E; E.FuncId = FuncId; E.CPU = CPU; E.Type = Type; E.TSC = TSC; auto StackEntryPtr = static_cast(TLD.ShadowStack) + (sizeof(StackEntry) * (TLD.StackEntries - 1)); internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); break; } case XRayEntryType::EXIT: case XRayEntryType::TAIL: { if (TLD.StackEntries == 0) break; if (--TLD.StackEntries >= TLD.StackSize) return; // When we encounter an exit event, we check whether all the following are // true: // // - The Function ID is the same as the most recent entry in the stack. // - The CPU is the same as the most recent entry in the stack. // - The Delta of the TSCs is less than the threshold amount of time we're // looking to record. // // If all of these conditions are true, we pop the stack and don't write a // record and move the record offset back. StackEntry StackTop; auto StackEntryPtr = static_cast(TLD.ShadowStack) + (sizeof(StackEntry) * TLD.StackEntries); internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && StackTop.TSC < TSC) { auto Delta = TSC - StackTop.TSC; if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) { DCHECK(TLD.BufferOffset > 0); TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; return; } } break; } default: // Should be unreachable. DCHECK(false && "Unsupported XRayEntryType encountered."); break; } // First determine whether the delta between the function's enter record and // the exit record is higher than the threshold. XRayRecord R; R.RecordType = RecordTypes::NORMAL; R.CPU = CPU; R.TSC = TSC; R.TId = GetTid(); R.PId = internal_getpid(); R.Type = Type; R.FuncId = FuncId; auto FirstEntry = reinterpret_cast(TLD.InMemoryBuffer); internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); if (++TLD.BufferOffset == TLD.BufferSize) { SpinMutexLock Lock(&LogMutex); LW->WriteAll(reinterpret_cast(FirstEntry), reinterpret_cast(FirstEntry + TLD.BufferOffset)); TLD.BufferOffset = 0; TLD.StackEntries = 0; } } template void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); auto FirstEntry = reinterpret_cast(TLD.InMemoryBuffer); const auto &BuffLen = TLD.BufferSize; LogWriter *LW = getGlobalLog(); if (LW == nullptr) return; // First we check whether there's enough space to write the data consecutively // in the thread-local buffer. If not, we first flush the buffer before // attempting to write the two records that must be consecutive. if (TLD.BufferOffset + 2 > BuffLen) { SpinMutexLock Lock(&LogMutex); LW->WriteAll(reinterpret_cast(FirstEntry), reinterpret_cast(FirstEntry + TLD.BufferOffset)); TLD.BufferOffset = 0; TLD.StackEntries = 0; } // Then we write the "we have an argument" record. InMemoryRawLog(FuncId, Type, ReadTSC); RecursionGuard G(Guard); if (!G) return; // And, from here on write the arg payload. XRayArgPayload R; R.RecordType = RecordTypes::ARG_PAYLOAD; R.FuncId = FuncId; R.TId = GetTid(); R.PId = internal_getpid(); R.Arg = Arg1; internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); if (++TLD.BufferOffset == BuffLen) { SpinMutexLock Lock(&LogMutex); LW->WriteAll(reinterpret_cast(FirstEntry), reinterpret_cast(FirstEntry + TLD.BufferOffset)); TLD.BufferOffset = 0; TLD.StackEntries = 0; } } void basicLoggingHandleArg0RealTSC(int32_t FuncId, XRayEntryType Type) XRAY_NEVER_INSTRUMENT { InMemoryRawLog(FuncId, Type, readTSC); } void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type) XRAY_NEVER_INSTRUMENT { InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { timespec TS; int result = clock_gettime(CLOCK_REALTIME, &TS); if (result != 0) { Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); TS = {0, 0}; } CPU = 0; return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; }); } void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT { InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC); } void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT { InMemoryRawLogWithArg( FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { timespec TS; int result = clock_gettime(CLOCK_REALTIME, &TS); if (result != 0) { Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); TS = {0, 0}; } CPU = 0; return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; }); } static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { ThreadLocalData &TLD = *reinterpret_cast(P); auto ExitGuard = at_scope_exit([&TLD] { // Clean up dynamic resources. if (TLD.InMemoryBuffer) InternalFree(TLD.InMemoryBuffer); if (TLD.ShadowStack) InternalFree(TLD.ShadowStack); if (Verbosity()) Report("Cleaned up log for TID: %d\n", GetTid()); }); if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) { if (Verbosity()) Report("Skipping buffer for TID: %d; Offset = %llu\n", GetTid(), TLD.BufferOffset); return; } { SpinMutexLock L(&LogMutex); TLD.LogWriter->WriteAll(reinterpret_cast(TLD.InMemoryBuffer), reinterpret_cast(TLD.InMemoryBuffer) + (sizeof(XRayRecord) * TLD.BufferOffset)); } // Because this thread's exit could be the last one trying to write to // the file and that we're not able to close out the file properly, we // sync instead and hope that the pending writes are flushed as the // thread exits. TLD.LogWriter->Flush(); } XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize, UNUSED size_t BufferMax, void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { uint8_t Expected = 0; if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, memory_order_acq_rel)) { if (Verbosity()) Report("Basic logging already initialized.\n"); return XRayLogInitStatus::XRAY_LOG_INITIALIZED; } static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; pthread_once(&OnceInit, +[] { pthread_key_create(&PThreadKey, TLDDestructor); atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release); // Initialize the global TicksPerSec value. atomic_store(&TicksPerSec, probeRequiredCPUFeatures() ? getTSCFrequency() : NanosecondsPerSecond, memory_order_release); if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity()) Report("WARNING: Required CPU features missing for XRay instrumentation, " "using emulation instead.\n"); }); FlagParser P; BasicFlags F; F.setDefaults(); registerXRayBasicFlags(&P, &F); P.ParseString(useCompilerDefinedBasicFlags()); auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS"); if (EnvOpts == nullptr) EnvOpts = ""; P.ParseString(EnvOpts); // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options // set through XRAY_OPTIONS instead. if (internal_strlen(EnvOpts) == 0) { F.func_duration_threshold_us = flags()->xray_naive_log_func_duration_threshold_us; F.max_stack_depth = flags()->xray_naive_log_max_stack_depth; F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size; } P.ParseString(static_cast(Options)); GlobalOptions.ThreadBufferSize = F.thread_buffer_size; GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; GlobalOptions.MaxStackDepth = F.max_stack_depth; *basicFlags() = F; atomic_store(&ThresholdTicks, atomic_load(&TicksPerSec, memory_order_acquire) * GlobalOptions.DurationFilterMicros / 1000000, memory_order_release); __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire) ? basicLoggingHandleArg1RealTSC : basicLoggingHandleArg1EmulateTSC); __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire) ? basicLoggingHandleArg0RealTSC : basicLoggingHandleArg0EmulateTSC); // TODO: Implement custom event and typed event handling support in Basic // Mode. __xray_remove_customevent_handler(); __xray_remove_typedevent_handler(); return XRayLogInitStatus::XRAY_LOG_INITIALIZED; } XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { uint8_t Expected = 0; if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0, memory_order_acq_rel) && Verbosity()) Report("Basic logging already finalized.\n"); // Nothing really to do aside from marking state of the global to be // uninitialized. return XRayLogInitStatus::XRAY_LOG_FINALIZED; } XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT { // This really does nothing, since flushing the logs happen at the end of a // thread's lifetime, or when the buffers are full. return XRayLogFlushStatus::XRAY_LOG_FLUSHED; } // This is a handler that, effectively, does nothing. void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT { } bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { XRayLogImpl Impl{ basicLoggingInit, basicLoggingFinalize, basicLoggingHandleArg0Empty, basicLoggingFlush, }; auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl); if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && Verbosity()) Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n", RegistrationResult); if (flags()->xray_naive_log || !internal_strcmp(flags()->xray_mode, "xray-basic")) { auto SelectResult = __xray_log_select_mode("xray-basic"); if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { if (Verbosity()) Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult); return false; } // We initialize the implementation using the data we get from the // XRAY_BASIC_OPTIONS environment variable, at this point of the // implementation. auto *Env = GetEnv("XRAY_BASIC_OPTIONS"); auto InitResult = __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env); if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) { if (Verbosity()) Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult); return false; } // At this point we know that we've successfully initialized Basic mode // tracing, and the only chance we're going to get for the current thread to // clean-up may be at thread/program exit. To ensure that we're going to get // the cleanup even without calling the finalization routines, we're // registering a program exit function that will do the cleanup. static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT; pthread_once(&DynamicOnce, +[] { static void *FakeTLD = nullptr; FakeTLD = &getThreadLocalData(); Atexit(+[] { TLDDestructor(FakeTLD); }); }); } return true; } } // namespace __xray static auto UNUSED Unused = __xray::basicLogDynamicInitializer();