// Copyright 2014 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include #include #include #include "base/base_switches.h" #include "base/bind.h" #include "base/command_line.h" #include "base/location.h" #include "base/memory/ptr_util.h" #include "base/message_loop/message_loop.h" #include "base/single_thread_task_runner.h" #include "base/strings/stringprintf.h" #include "base/synchronization/condition_variable.h" #include "base/synchronization/lock.h" #include "base/synchronization/waitable_event.h" #include "base/threading/thread.h" #include "base/time/time.h" #include "build/build_config.h" #include "testing/gtest/include/gtest/gtest.h" #include "testing/perf/perf_test.h" #if defined(OS_POSIX) #include #endif namespace base { namespace { const int kNumRuns = 100000; // Base class for a threading perf-test. This sets up some threads for the // test and measures the clock-time in addition to time spent on each thread. class ThreadPerfTest : public testing::Test { public: ThreadPerfTest() : done_(WaitableEvent::ResetPolicy::AUTOMATIC, WaitableEvent::InitialState::NOT_SIGNALED) {} // To be implemented by each test. Subclass must uses threads_ such that // their cpu-time can be measured. Test must return from PingPong() _and_ // call FinishMeasurement from any thread to complete the test. virtual void Init() { if (ThreadTicks::IsSupported()) ThreadTicks::WaitUntilInitialized(); } virtual void PingPong(int hops) = 0; virtual void Reset() {} void TimeOnThread(base::ThreadTicks* ticks, base::WaitableEvent* done) { *ticks = base::ThreadTicks::Now(); done->Signal(); } base::ThreadTicks ThreadNow(const base::Thread& thread) { base::WaitableEvent done(WaitableEvent::ResetPolicy::AUTOMATIC, WaitableEvent::InitialState::NOT_SIGNALED); base::ThreadTicks ticks; thread.task_runner()->PostTask( FROM_HERE, base::BindOnce(&ThreadPerfTest::TimeOnThread, base::Unretained(this), &ticks, &done)); done.Wait(); return ticks; } void RunPingPongTest(const std::string& name, unsigned num_threads) { // Create threads and collect starting cpu-time for each thread. std::vector thread_starts; while (threads_.size() < num_threads) { threads_.push_back(std::make_unique("PingPonger")); threads_.back()->Start(); if (base::ThreadTicks::IsSupported()) thread_starts.push_back(ThreadNow(*threads_.back())); } Init(); base::TimeTicks start = base::TimeTicks::Now(); PingPong(kNumRuns); done_.Wait(); base::TimeTicks end = base::TimeTicks::Now(); // Gather the cpu-time spent on each thread. This does one extra tasks, // but that should be in the noise given enough runs. base::TimeDelta thread_time; while (threads_.size()) { if (base::ThreadTicks::IsSupported()) { thread_time += ThreadNow(*threads_.back()) - thread_starts.back(); thread_starts.pop_back(); } threads_.pop_back(); } Reset(); double num_runs = static_cast(kNumRuns); double us_per_task_clock = (end - start).InMicroseconds() / num_runs; double us_per_task_cpu = thread_time.InMicroseconds() / num_runs; // Clock time per task. perf_test::PrintResult( "task", "", name + "_time ", us_per_task_clock, "us/hop", true); // Total utilization across threads if available (likely higher). if (base::ThreadTicks::IsSupported()) { perf_test::PrintResult( "task", "", name + "_cpu ", us_per_task_cpu, "us/hop", true); } } protected: void FinishMeasurement() { done_.Signal(); } std::vector> threads_; private: base::WaitableEvent done_; }; // Class to test task performance by posting empty tasks back and forth. class TaskPerfTest : public ThreadPerfTest { base::Thread* NextThread(int count) { return threads_[count % threads_.size()].get(); } void PingPong(int hops) override { if (!hops) { FinishMeasurement(); return; } NextThread(hops)->task_runner()->PostTask( FROM_HERE, base::BindOnce(&ThreadPerfTest::PingPong, base::Unretained(this), hops - 1)); } }; // This tries to test the 'best-case' as well as the 'worst-case' task posting // performance. The best-case keeps one thread alive such that it never yeilds, // while the worse-case forces a context switch for every task. Four threads are // used to ensure the threads do yeild (with just two it might be possible for // both threads to stay awake if they can signal each other fast enough). TEST_F(TaskPerfTest, TaskPingPong) { RunPingPongTest("1_Task_Threads", 1); RunPingPongTest("4_Task_Threads", 4); } // Same as above, but add observers to test their perf impact. class MessageLoopObserver : public base::MessageLoop::TaskObserver { public: void WillProcessTask(const base::PendingTask& pending_task) override {} void DidProcessTask(const base::PendingTask& pending_task) override {} }; MessageLoopObserver message_loop_observer; class TaskObserverPerfTest : public TaskPerfTest { public: void Init() override { TaskPerfTest::Init(); for (size_t i = 0; i < threads_.size(); i++) { threads_[i]->message_loop()->task_runner()->PostTask( FROM_HERE, BindOnce(&MessageLoop::AddTaskObserver, Unretained(threads_[i]->message_loop()), Unretained(&message_loop_observer))); } } }; TEST_F(TaskObserverPerfTest, TaskPingPong) { RunPingPongTest("1_Task_Threads_With_Observer", 1); RunPingPongTest("4_Task_Threads_With_Observer", 4); } // Class to test our WaitableEvent performance by signaling back and fort. // WaitableEvent is templated so we can also compare with other versions. template class EventPerfTest : public ThreadPerfTest { public: void Init() override { for (size_t i = 0; i < threads_.size(); i++) { events_.push_back(std::make_unique( WaitableEvent::ResetPolicy::AUTOMATIC, WaitableEvent::InitialState::NOT_SIGNALED)); } } void Reset() override { events_.clear(); } void WaitAndSignalOnThread(size_t event) { size_t next_event = (event + 1) % events_.size(); int my_hops = 0; do { events_[event]->Wait(); my_hops = --remaining_hops_; // We own 'hops' between Wait and Signal. events_[next_event]->Signal(); } while (my_hops > 0); // Once we are done, all threads will signal as hops passes zero. // We only signal completion once, on the thread that reaches zero. if (!my_hops) FinishMeasurement(); } void PingPong(int hops) override { remaining_hops_ = hops; for (size_t i = 0; i < threads_.size(); i++) { threads_[i]->task_runner()->PostTask( FROM_HERE, base::BindOnce(&EventPerfTest::WaitAndSignalOnThread, base::Unretained(this), i)); } // Kick off the Signal ping-ponging. events_.front()->Signal(); } int remaining_hops_; std::vector> events_; }; // Similar to the task posting test, this just tests similar functionality // using WaitableEvents. We only test four threads (worst-case), but we // might want to craft a way to test the best-case (where the thread doesn't // end up blocking because the event is already signalled). typedef EventPerfTest WaitableEventThreadPerfTest; TEST_F(WaitableEventThreadPerfTest, EventPingPong) { RunPingPongTest("4_WaitableEvent_Threads", 4); } // Build a minimal event using ConditionVariable. class ConditionVariableEvent { public: ConditionVariableEvent(WaitableEvent::ResetPolicy reset_policy, WaitableEvent::InitialState initial_state) : cond_(&lock_), signaled_(false) { DCHECK_EQ(WaitableEvent::ResetPolicy::AUTOMATIC, reset_policy); DCHECK_EQ(WaitableEvent::InitialState::NOT_SIGNALED, initial_state); } void Signal() { { base::AutoLock scoped_lock(lock_); signaled_ = true; } cond_.Signal(); } void Wait() { base::AutoLock scoped_lock(lock_); while (!signaled_) cond_.Wait(); signaled_ = false; } private: base::Lock lock_; base::ConditionVariable cond_; bool signaled_; }; // This is meant to test the absolute minimal context switching time // using our own base synchronization code. typedef EventPerfTest ConditionVariablePerfTest; TEST_F(ConditionVariablePerfTest, EventPingPong) { RunPingPongTest("4_ConditionVariable_Threads", 4); } #if defined(OS_POSIX) // Absolutely 100% minimal posix waitable event. If there is a better/faster // way to force a context switch, we should use that instead. class PthreadEvent { public: PthreadEvent(WaitableEvent::ResetPolicy reset_policy, WaitableEvent::InitialState initial_state) { DCHECK_EQ(WaitableEvent::ResetPolicy::AUTOMATIC, reset_policy); DCHECK_EQ(WaitableEvent::InitialState::NOT_SIGNALED, initial_state); pthread_mutex_init(&mutex_, nullptr); pthread_cond_init(&cond_, nullptr); signaled_ = false; } ~PthreadEvent() { pthread_cond_destroy(&cond_); pthread_mutex_destroy(&mutex_); } void Signal() { pthread_mutex_lock(&mutex_); signaled_ = true; pthread_mutex_unlock(&mutex_); pthread_cond_signal(&cond_); } void Wait() { pthread_mutex_lock(&mutex_); while (!signaled_) pthread_cond_wait(&cond_, &mutex_); signaled_ = false; pthread_mutex_unlock(&mutex_); } private: bool signaled_; pthread_mutex_t mutex_; pthread_cond_t cond_; }; // This is meant to test the absolute minimal context switching time. // If there is any faster way to do this we should substitute it in. typedef EventPerfTest PthreadEventPerfTest; TEST_F(PthreadEventPerfTest, EventPingPong) { RunPingPongTest("4_PthreadCondVar_Threads", 4); } #endif } // namespace } // namespace base