You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
838 lines
32 KiB
838 lines
32 KiB
/*
|
|
* Copyright (C) 2016 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "scheduler.h"
|
|
|
|
#include <string>
|
|
|
|
#include "base/scoped_arena_allocator.h"
|
|
#include "base/scoped_arena_containers.h"
|
|
#include "data_type-inl.h"
|
|
#include "optimizing/load_store_analysis.h"
|
|
#include "prepare_for_register_allocation.h"
|
|
|
|
#ifdef ART_ENABLE_CODEGEN_arm64
|
|
#include "scheduler_arm64.h"
|
|
#endif
|
|
|
|
#ifdef ART_ENABLE_CODEGEN_arm
|
|
#include "scheduler_arm.h"
|
|
#endif
|
|
|
|
namespace art {
|
|
|
|
void SchedulingGraph::AddDependency(SchedulingNode* node,
|
|
SchedulingNode* dependency,
|
|
bool is_data_dependency) {
|
|
if (node == nullptr || dependency == nullptr) {
|
|
// A `nullptr` node indicates an instruction out of scheduling range (eg. in
|
|
// an other block), so we do not need to add a dependency edge to the graph.
|
|
return;
|
|
}
|
|
|
|
if (is_data_dependency) {
|
|
node->AddDataPredecessor(dependency);
|
|
} else {
|
|
node->AddOtherPredecessor(dependency);
|
|
}
|
|
}
|
|
|
|
bool SideEffectDependencyAnalysis::HasReorderingDependency(const HInstruction* instr1,
|
|
const HInstruction* instr2) {
|
|
SideEffects instr1_side_effects = instr1->GetSideEffects();
|
|
SideEffects instr2_side_effects = instr2->GetSideEffects();
|
|
|
|
// Read after write.
|
|
if (instr1_side_effects.MayDependOn(instr2_side_effects)) {
|
|
return true;
|
|
}
|
|
|
|
// Write after read.
|
|
if (instr2_side_effects.MayDependOn(instr1_side_effects)) {
|
|
return true;
|
|
}
|
|
|
|
// Memory write after write.
|
|
if (instr1_side_effects.DoesAnyWrite() && instr2_side_effects.DoesAnyWrite()) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessHeapLocation(
|
|
HInstruction* instruction) const {
|
|
DCHECK(heap_location_collector_ != nullptr);
|
|
size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction);
|
|
// This array access should be analyzed and added to HeapLocationCollector before.
|
|
DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
|
|
return heap_loc;
|
|
}
|
|
|
|
bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::ArrayAccessMayAlias(
|
|
HInstruction* instr1, HInstruction* instr2) const {
|
|
DCHECK(heap_location_collector_ != nullptr);
|
|
size_t instr1_heap_loc = ArrayAccessHeapLocation(instr1);
|
|
size_t instr2_heap_loc = ArrayAccessHeapLocation(instr2);
|
|
|
|
// For example: arr[0] and arr[0]
|
|
if (instr1_heap_loc == instr2_heap_loc) {
|
|
return true;
|
|
}
|
|
|
|
// For example: arr[0] and arr[i]
|
|
if (heap_location_collector_->MayAlias(instr1_heap_loc, instr2_heap_loc)) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool IsArrayAccess(const HInstruction* instruction) {
|
|
return instruction->IsArrayGet() || instruction->IsArraySet();
|
|
}
|
|
|
|
static bool IsInstanceFieldAccess(const HInstruction* instruction) {
|
|
return instruction->IsInstanceFieldGet() ||
|
|
instruction->IsInstanceFieldSet() ||
|
|
instruction->IsPredicatedInstanceFieldGet() ||
|
|
instruction->IsUnresolvedInstanceFieldGet() ||
|
|
instruction->IsUnresolvedInstanceFieldSet();
|
|
}
|
|
|
|
static bool IsStaticFieldAccess(const HInstruction* instruction) {
|
|
return instruction->IsStaticFieldGet() ||
|
|
instruction->IsStaticFieldSet() ||
|
|
instruction->IsUnresolvedStaticFieldGet() ||
|
|
instruction->IsUnresolvedStaticFieldSet();
|
|
}
|
|
|
|
static bool IsResolvedFieldAccess(const HInstruction* instruction) {
|
|
return instruction->IsInstanceFieldGet() ||
|
|
instruction->IsInstanceFieldSet() ||
|
|
instruction->IsPredicatedInstanceFieldGet() ||
|
|
instruction->IsStaticFieldGet() ||
|
|
instruction->IsStaticFieldSet();
|
|
}
|
|
|
|
static bool IsUnresolvedFieldAccess(const HInstruction* instruction) {
|
|
return instruction->IsUnresolvedInstanceFieldGet() ||
|
|
instruction->IsUnresolvedInstanceFieldSet() ||
|
|
instruction->IsUnresolvedStaticFieldGet() ||
|
|
instruction->IsUnresolvedStaticFieldSet();
|
|
}
|
|
|
|
static bool IsFieldAccess(const HInstruction* instruction) {
|
|
return IsResolvedFieldAccess(instruction) || IsUnresolvedFieldAccess(instruction);
|
|
}
|
|
|
|
static const FieldInfo* GetFieldInfo(const HInstruction* instruction) {
|
|
return &instruction->GetFieldInfo();
|
|
}
|
|
|
|
size_t SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessHeapLocation(
|
|
const HInstruction* instr) const {
|
|
DCHECK(instr != nullptr);
|
|
DCHECK(GetFieldInfo(instr) != nullptr);
|
|
DCHECK(heap_location_collector_ != nullptr);
|
|
|
|
HInstruction* ref = instr->IsPredicatedInstanceFieldGet()
|
|
? instr->AsPredicatedInstanceFieldGet()->GetTarget()
|
|
: instr->InputAt(0);
|
|
size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(ref, GetFieldInfo(instr));
|
|
// This field access should be analyzed and added to HeapLocationCollector before.
|
|
DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
|
|
|
|
return heap_loc;
|
|
}
|
|
|
|
bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::FieldAccessMayAlias(
|
|
const HInstruction* instr1, const HInstruction* instr2) const {
|
|
DCHECK(heap_location_collector_ != nullptr);
|
|
|
|
// Static and instance field accesses should not alias.
|
|
if ((IsInstanceFieldAccess(instr1) && IsStaticFieldAccess(instr2)) ||
|
|
(IsStaticFieldAccess(instr1) && IsInstanceFieldAccess(instr2))) {
|
|
return false;
|
|
}
|
|
|
|
// If either of the field accesses is unresolved.
|
|
if (IsUnresolvedFieldAccess(instr1) || IsUnresolvedFieldAccess(instr2)) {
|
|
// Conservatively treat these two accesses may alias.
|
|
return true;
|
|
}
|
|
|
|
// If both fields accesses are resolved.
|
|
size_t instr1_field_access_heap_loc = FieldAccessHeapLocation(instr1);
|
|
size_t instr2_field_access_heap_loc = FieldAccessHeapLocation(instr2);
|
|
|
|
if (instr1_field_access_heap_loc == instr2_field_access_heap_loc) {
|
|
return true;
|
|
}
|
|
|
|
if (!heap_location_collector_->MayAlias(instr1_field_access_heap_loc,
|
|
instr2_field_access_heap_loc)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool SideEffectDependencyAnalysis::MemoryDependencyAnalysis::HasMemoryDependency(
|
|
HInstruction* instr1, HInstruction* instr2) const {
|
|
if (!HasReorderingDependency(instr1, instr2)) {
|
|
return false;
|
|
}
|
|
|
|
if (heap_location_collector_ == nullptr ||
|
|
heap_location_collector_->GetNumberOfHeapLocations() == 0) {
|
|
// Without HeapLocation information from load store analysis,
|
|
// we cannot do further disambiguation analysis on these two instructions.
|
|
// Just simply say that those two instructions have memory dependency.
|
|
return true;
|
|
}
|
|
|
|
if (IsArrayAccess(instr1) && IsArrayAccess(instr2)) {
|
|
return ArrayAccessMayAlias(instr1, instr2);
|
|
}
|
|
if (IsFieldAccess(instr1) && IsFieldAccess(instr2)) {
|
|
return FieldAccessMayAlias(instr1, instr2);
|
|
}
|
|
|
|
// TODO(xueliang): LSA to support alias analysis among HVecLoad, HVecStore and ArrayAccess
|
|
if (instr1->IsVecMemoryOperation() && instr2->IsVecMemoryOperation()) {
|
|
return true;
|
|
}
|
|
if (instr1->IsVecMemoryOperation() && IsArrayAccess(instr2)) {
|
|
return true;
|
|
}
|
|
if (IsArrayAccess(instr1) && instr2->IsVecMemoryOperation()) {
|
|
return true;
|
|
}
|
|
|
|
// Heap accesses of different kinds should not alias.
|
|
if (IsArrayAccess(instr1) && IsFieldAccess(instr2)) {
|
|
return false;
|
|
}
|
|
if (IsFieldAccess(instr1) && IsArrayAccess(instr2)) {
|
|
return false;
|
|
}
|
|
if (instr1->IsVecMemoryOperation() && IsFieldAccess(instr2)) {
|
|
return false;
|
|
}
|
|
if (IsFieldAccess(instr1) && instr2->IsVecMemoryOperation()) {
|
|
return false;
|
|
}
|
|
|
|
// We conservatively treat all other cases having dependency,
|
|
// for example, Invoke and ArrayGet.
|
|
return true;
|
|
}
|
|
|
|
bool SideEffectDependencyAnalysis::HasExceptionDependency(const HInstruction* instr1,
|
|
const HInstruction* instr2) {
|
|
if (instr2->CanThrow() && instr1->GetSideEffects().DoesAnyWrite()) {
|
|
return true;
|
|
}
|
|
if (instr2->GetSideEffects().DoesAnyWrite() && instr1->CanThrow()) {
|
|
return true;
|
|
}
|
|
if (instr2->CanThrow() && instr1->CanThrow()) {
|
|
return true;
|
|
}
|
|
|
|
// Above checks should cover all cases where we cannot reorder two
|
|
// instructions which may throw exception.
|
|
return false;
|
|
}
|
|
|
|
// Check if the specified instruction is a better candidate which more likely will
|
|
// have other instructions depending on it.
|
|
static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate,
|
|
HInstruction* old_candidate) {
|
|
if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) {
|
|
// Weaker side effects.
|
|
return false;
|
|
}
|
|
if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) {
|
|
// Same side effects, check if `new_candidate` has stronger `CanThrow()`.
|
|
return new_candidate->CanThrow() && !old_candidate->CanThrow();
|
|
} else {
|
|
// Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`.
|
|
return new_candidate->CanThrow() || !old_candidate->CanThrow();
|
|
}
|
|
}
|
|
|
|
void SchedulingGraph::AddCrossIterationDependencies(SchedulingNode* node) {
|
|
for (HInstruction* instruction : node->GetInstruction()->GetInputs()) {
|
|
// Having a phi-function from a loop header as an input means the current node of the
|
|
// scheduling graph has a cross-iteration dependency because such phi-functions bring values
|
|
// from the previous iteration to the current iteration.
|
|
if (!instruction->IsLoopHeaderPhi()) {
|
|
continue;
|
|
}
|
|
for (HInstruction* phi_input : instruction->GetInputs()) {
|
|
// As a scheduling graph of the current basic block is built by
|
|
// processing instructions bottom-up, nullptr returned by GetNode means
|
|
// an instruction defining a value for the phi is either before the
|
|
// instruction represented by node or it is in a different basic block.
|
|
SchedulingNode* def_node = GetNode(phi_input);
|
|
|
|
// We don't create a dependency if there are uses besides the use in phi.
|
|
// In such cases a register to hold phi_input is usually allocated and
|
|
// a MOV instruction is generated. In cases with multiple uses and no MOV
|
|
// instruction, reordering creating a MOV instruction can improve
|
|
// performance more than an attempt to avoid a MOV instruction.
|
|
if (def_node != nullptr && def_node != node && phi_input->GetUses().HasExactlyOneElement()) {
|
|
// We have an implicit data dependency between node and def_node.
|
|
// AddAddDataDependency cannot be used because it is for explicit data dependencies.
|
|
// So AddOtherDependency is used.
|
|
AddOtherDependency(def_node, node);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void SchedulingGraph::AddDependencies(SchedulingNode* instruction_node,
|
|
bool is_scheduling_barrier) {
|
|
HInstruction* instruction = instruction_node->GetInstruction();
|
|
|
|
// Define-use dependencies.
|
|
for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
|
|
AddDataDependency(GetNode(use.GetUser()), instruction_node);
|
|
}
|
|
|
|
// Scheduling barrier dependencies.
|
|
DCHECK(!is_scheduling_barrier || contains_scheduling_barrier_);
|
|
if (contains_scheduling_barrier_) {
|
|
// A barrier depends on instructions after it. And instructions before the
|
|
// barrier depend on it.
|
|
for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
|
|
SchedulingNode* other_node = GetNode(other);
|
|
CHECK(other_node != nullptr)
|
|
<< other->DebugName()
|
|
<< " is in block " << other->GetBlock()->GetBlockId()
|
|
<< ", and expected in block " << instruction->GetBlock()->GetBlockId();
|
|
bool other_is_barrier = other_node->IsSchedulingBarrier();
|
|
if (is_scheduling_barrier || other_is_barrier) {
|
|
AddOtherDependency(other_node, instruction_node);
|
|
}
|
|
if (other_is_barrier) {
|
|
// This other scheduling barrier guarantees ordering of instructions after
|
|
// it, so avoid creating additional useless dependencies in the graph.
|
|
// For example if we have
|
|
// instr_1
|
|
// barrier_2
|
|
// instr_3
|
|
// barrier_4
|
|
// instr_5
|
|
// we only create the following non-data dependencies
|
|
// 1 -> 2
|
|
// 2 -> 3
|
|
// 2 -> 4
|
|
// 3 -> 4
|
|
// 4 -> 5
|
|
// and do not create
|
|
// 1 -> 4
|
|
// 2 -> 5
|
|
// Note that in this example we could also avoid creating the dependency
|
|
// `2 -> 4`. But if we remove `instr_3` that dependency is required to
|
|
// order the barriers. So we generate it to avoid a special case.
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Side effect dependencies.
|
|
if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
|
|
HInstruction* dep_chain_candidate = nullptr;
|
|
for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
|
|
SchedulingNode* other_node = GetNode(other);
|
|
if (other_node->IsSchedulingBarrier()) {
|
|
// We have reached a scheduling barrier so we can stop further
|
|
// processing.
|
|
//
|
|
// As a "other" dependency is not set up if a data dependency exists, we need to check that
|
|
// one of them must exist.
|
|
DCHECK(other_node->HasOtherDependency(instruction_node)
|
|
|| other_node->HasDataDependency(instruction_node));
|
|
break;
|
|
}
|
|
if (side_effect_dependency_analysis_.HasSideEffectDependency(other, instruction)) {
|
|
if (dep_chain_candidate != nullptr &&
|
|
side_effect_dependency_analysis_.HasSideEffectDependency(other, dep_chain_candidate)) {
|
|
// Skip an explicit dependency to reduce memory usage, rely on the transitive dependency.
|
|
} else {
|
|
AddOtherDependency(other_node, instruction_node);
|
|
}
|
|
// Check if `other` is a better candidate which more likely will have other instructions
|
|
// depending on it.
|
|
if (dep_chain_candidate == nullptr ||
|
|
IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) {
|
|
dep_chain_candidate = other;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Environment dependencies.
|
|
// We do not need to process those if the instruction is a scheduling barrier,
|
|
// since the barrier already has non-data dependencies on all following
|
|
// instructions.
|
|
if (!is_scheduling_barrier) {
|
|
for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
|
|
// Note that here we could stop processing if the environment holder is
|
|
// across a scheduling barrier. But checking this would likely require
|
|
// more work than simply iterating through environment uses.
|
|
AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
|
|
}
|
|
}
|
|
|
|
AddCrossIterationDependencies(instruction_node);
|
|
}
|
|
|
|
static const std::string InstructionTypeId(const HInstruction* instruction) {
|
|
return DataType::TypeId(instruction->GetType()) + std::to_string(instruction->GetId());
|
|
}
|
|
|
|
// Ideally we would reuse the graph visualizer code, but it is not available
|
|
// from here and it is not worth moving all that code only for our use.
|
|
static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
|
|
const HInstruction* instruction = node->GetInstruction();
|
|
// Use the instruction typed id as the node identifier.
|
|
std::string instruction_id = InstructionTypeId(instruction);
|
|
output << instruction_id << "[shape=record, label=\""
|
|
<< instruction_id << ' ' << instruction->DebugName() << " [";
|
|
// List the instruction's inputs in its description. When visualizing the
|
|
// graph this helps differentiating data inputs from other dependencies.
|
|
const char* seperator = "";
|
|
for (const HInstruction* input : instruction->GetInputs()) {
|
|
output << seperator << InstructionTypeId(input);
|
|
seperator = ",";
|
|
}
|
|
output << "]";
|
|
// Other properties of the node.
|
|
output << "\\ninternal_latency: " << node->GetInternalLatency();
|
|
output << "\\ncritical_path: " << node->GetCriticalPath();
|
|
if (node->IsSchedulingBarrier()) {
|
|
output << "\\n(barrier)";
|
|
}
|
|
output << "\"];\n";
|
|
// We want program order to go from top to bottom in the graph output, so we
|
|
// reverse the edges and specify `dir=back`.
|
|
for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
|
|
const HInstruction* predecessor_instruction = predecessor->GetInstruction();
|
|
output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
|
|
<< "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
|
|
}
|
|
for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
|
|
const HInstruction* predecessor_instruction = predecessor->GetInstruction();
|
|
output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
|
|
<< "[dir=back,color=blue]\n";
|
|
}
|
|
}
|
|
|
|
void SchedulingGraph::DumpAsDotGraph(const std::string& description,
|
|
const ScopedArenaVector<SchedulingNode*>& initial_candidates) {
|
|
// TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
|
|
// we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
|
|
std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
|
|
// Description of this graph, as a comment.
|
|
output << "// " << description << "\n";
|
|
// Start the dot graph. Use an increasing index for easier differentiation.
|
|
output << "digraph G {\n";
|
|
for (const auto& entry : nodes_map_) {
|
|
SchedulingNode* node = entry.second.get();
|
|
DumpAsDotNode(output, node);
|
|
}
|
|
// Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
|
|
for (SchedulingNode* node : initial_candidates) {
|
|
const HInstruction* instruction = node->GetInstruction();
|
|
output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
|
|
<< "[label=\"" << node->GetLatency() << "\",dir=back]\n";
|
|
}
|
|
// End of the dot graph.
|
|
output << "}\n";
|
|
output.close();
|
|
}
|
|
|
|
SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
|
|
ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
|
|
// Schedule condition inputs that can be materialized immediately before their use.
|
|
// In following example, after we've scheduled HSelect, we want LessThan to be scheduled
|
|
// immediately, because it is a materialized condition, and will be emitted right before HSelect
|
|
// in codegen phase.
|
|
//
|
|
// i20 HLessThan [...] HLessThan HAdd HAdd
|
|
// i21 HAdd [...] ===> | | |
|
|
// i22 HAdd [...] +----------+---------+
|
|
// i23 HSelect [i21, i22, i20] HSelect
|
|
|
|
if (prev_select_ == nullptr) {
|
|
return nullptr;
|
|
}
|
|
|
|
const HInstruction* instruction = prev_select_->GetInstruction();
|
|
const HCondition* condition = nullptr;
|
|
DCHECK(instruction != nullptr);
|
|
|
|
if (instruction->IsIf()) {
|
|
condition = instruction->AsIf()->InputAt(0)->AsCondition();
|
|
} else if (instruction->IsSelect()) {
|
|
condition = instruction->AsSelect()->GetCondition()->AsCondition();
|
|
}
|
|
|
|
SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
|
|
|
|
if ((condition_node != nullptr) &&
|
|
condition->HasOnlyOneNonEnvironmentUse() &&
|
|
ContainsElement(*nodes, condition_node)) {
|
|
DCHECK(!condition_node->HasUnscheduledSuccessors());
|
|
// Remove the condition from the list of candidates and schedule it.
|
|
RemoveElement(*nodes, condition_node);
|
|
return condition_node;
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
|
|
ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
|
|
DCHECK(!nodes->empty());
|
|
SchedulingNode* select_node = nullptr;
|
|
|
|
// Optimize for materialized condition and its emit before use scenario.
|
|
select_node = SelectMaterializedCondition(nodes, graph);
|
|
|
|
if (select_node == nullptr) {
|
|
// Get highest priority node based on critical path information.
|
|
select_node = (*nodes)[0];
|
|
size_t select = 0;
|
|
for (size_t i = 1, e = nodes->size(); i < e; i++) {
|
|
SchedulingNode* check = (*nodes)[i];
|
|
SchedulingNode* candidate = (*nodes)[select];
|
|
select_node = GetHigherPrioritySchedulingNode(candidate, check);
|
|
if (select_node == check) {
|
|
select = i;
|
|
}
|
|
}
|
|
DeleteNodeAtIndex(nodes, select);
|
|
}
|
|
|
|
prev_select_ = select_node;
|
|
return select_node;
|
|
}
|
|
|
|
SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
|
|
SchedulingNode* candidate, SchedulingNode* check) const {
|
|
uint32_t candidate_path = candidate->GetCriticalPath();
|
|
uint32_t check_path = check->GetCriticalPath();
|
|
// First look at the critical_path.
|
|
if (check_path != candidate_path) {
|
|
return check_path < candidate_path ? check : candidate;
|
|
}
|
|
// If both critical paths are equal, schedule instructions with a higher latency
|
|
// first in program order.
|
|
return check->GetLatency() < candidate->GetLatency() ? check : candidate;
|
|
}
|
|
|
|
void HScheduler::Schedule(HGraph* graph) {
|
|
// We run lsa here instead of in a separate pass to better control whether we
|
|
// should run the analysis or not.
|
|
const HeapLocationCollector* heap_location_collector = nullptr;
|
|
ScopedArenaAllocator allocator(graph->GetArenaStack());
|
|
LoadStoreAnalysis lsa(graph, /*stats=*/nullptr, &allocator, LoadStoreAnalysisType::kBasic);
|
|
if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
|
|
lsa.Run();
|
|
heap_location_collector = &lsa.GetHeapLocationCollector();
|
|
}
|
|
|
|
for (HBasicBlock* block : graph->GetReversePostOrder()) {
|
|
if (IsSchedulable(block)) {
|
|
Schedule(block, heap_location_collector);
|
|
}
|
|
}
|
|
}
|
|
|
|
void HScheduler::Schedule(HBasicBlock* block,
|
|
const HeapLocationCollector* heap_location_collector) {
|
|
ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack());
|
|
ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler));
|
|
|
|
// Build the scheduling graph.
|
|
SchedulingGraph scheduling_graph(&allocator, heap_location_collector);
|
|
for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
|
|
HInstruction* instruction = it.Current();
|
|
CHECK_EQ(instruction->GetBlock(), block)
|
|
<< instruction->DebugName()
|
|
<< " is in block " << instruction->GetBlock()->GetBlockId()
|
|
<< ", and expected in block " << block->GetBlockId();
|
|
SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction));
|
|
CalculateLatency(node);
|
|
scheduling_nodes.push_back(node);
|
|
}
|
|
|
|
if (scheduling_graph.Size() <= 1) {
|
|
return;
|
|
}
|
|
|
|
cursor_ = block->GetLastInstruction();
|
|
|
|
// The list of candidates for scheduling. A node becomes a candidate when all
|
|
// its predecessors have been scheduled.
|
|
ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler));
|
|
|
|
// Find the initial candidates for scheduling.
|
|
for (SchedulingNode* node : scheduling_nodes) {
|
|
if (!node->HasUnscheduledSuccessors()) {
|
|
node->MaybeUpdateCriticalPath(node->GetLatency());
|
|
candidates.push_back(node);
|
|
}
|
|
}
|
|
|
|
ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler));
|
|
if (kDumpDotSchedulingGraphs) {
|
|
// Remember the list of initial candidates for debug output purposes.
|
|
initial_candidates.assign(candidates.begin(), candidates.end());
|
|
}
|
|
|
|
// Schedule all nodes.
|
|
selector_->Reset();
|
|
while (!candidates.empty()) {
|
|
SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph);
|
|
Schedule(node, &candidates);
|
|
}
|
|
|
|
if (kDumpDotSchedulingGraphs) {
|
|
// Dump the graph in `dot` format.
|
|
HGraph* graph = block->GetGraph();
|
|
std::stringstream description;
|
|
description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
|
|
<< " B" << block->GetBlockId();
|
|
scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates);
|
|
}
|
|
}
|
|
|
|
void HScheduler::Schedule(SchedulingNode* scheduling_node,
|
|
/*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) {
|
|
// Check whether any of the node's predecessors will be valid candidates after
|
|
// this node is scheduled.
|
|
uint32_t path_to_node = scheduling_node->GetCriticalPath();
|
|
for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
|
|
predecessor->MaybeUpdateCriticalPath(
|
|
path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
|
|
predecessor->DecrementNumberOfUnscheduledSuccessors();
|
|
if (!predecessor->HasUnscheduledSuccessors()) {
|
|
candidates->push_back(predecessor);
|
|
}
|
|
}
|
|
for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
|
|
// Do not update the critical path.
|
|
// The 'other' (so 'non-data') dependencies (usually) do not represent a
|
|
// 'material' dependency of nodes on others. They exist for program
|
|
// correctness. So we do not use them to compute the critical path.
|
|
predecessor->DecrementNumberOfUnscheduledSuccessors();
|
|
if (!predecessor->HasUnscheduledSuccessors()) {
|
|
candidates->push_back(predecessor);
|
|
}
|
|
}
|
|
|
|
Schedule(scheduling_node->GetInstruction());
|
|
}
|
|
|
|
// Move an instruction after cursor instruction inside one basic block.
|
|
static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
|
|
DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
|
|
DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
|
|
DCHECK(!instruction->IsControlFlow());
|
|
DCHECK(!cursor->IsControlFlow());
|
|
instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false);
|
|
}
|
|
|
|
void HScheduler::Schedule(HInstruction* instruction) {
|
|
if (instruction == cursor_) {
|
|
cursor_ = cursor_->GetPrevious();
|
|
} else {
|
|
MoveAfterInBlock(instruction, cursor_);
|
|
}
|
|
}
|
|
|
|
bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
|
|
// We want to avoid exhaustively listing all instructions, so we first check
|
|
// for instruction categories that we know are safe.
|
|
if (instruction->IsControlFlow() ||
|
|
instruction->IsConstant()) {
|
|
return true;
|
|
}
|
|
// Currently all unary and binary operations are safe to schedule, so avoid
|
|
// checking for each of them individually.
|
|
// Since nothing prevents a new scheduling-unsafe HInstruction to subclass
|
|
// HUnaryOperation (or HBinaryOperation), check in debug mode that we have
|
|
// the exhaustive lists here.
|
|
if (instruction->IsUnaryOperation()) {
|
|
DCHECK(instruction->IsAbs() ||
|
|
instruction->IsBooleanNot() ||
|
|
instruction->IsNot() ||
|
|
instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
|
|
return true;
|
|
}
|
|
if (instruction->IsBinaryOperation()) {
|
|
DCHECK(instruction->IsAdd() ||
|
|
instruction->IsAnd() ||
|
|
instruction->IsCompare() ||
|
|
instruction->IsCondition() ||
|
|
instruction->IsDiv() ||
|
|
instruction->IsMin() ||
|
|
instruction->IsMax() ||
|
|
instruction->IsMul() ||
|
|
instruction->IsOr() ||
|
|
instruction->IsRem() ||
|
|
instruction->IsRor() ||
|
|
instruction->IsShl() ||
|
|
instruction->IsShr() ||
|
|
instruction->IsSub() ||
|
|
instruction->IsUShr() ||
|
|
instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
|
|
return true;
|
|
}
|
|
// The scheduler should not see any of these.
|
|
DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
|
|
// List of instructions explicitly excluded:
|
|
// HClearException
|
|
// HClinitCheck
|
|
// HDeoptimize
|
|
// HLoadClass
|
|
// HLoadException
|
|
// HMemoryBarrier
|
|
// HMonitorOperation
|
|
// HNativeDebugInfo
|
|
// HThrow
|
|
// HTryBoundary
|
|
// TODO: Some of the instructions above may be safe to schedule (maybe as
|
|
// scheduling barriers).
|
|
return instruction->IsArrayGet() ||
|
|
instruction->IsArraySet() ||
|
|
instruction->IsArrayLength() ||
|
|
instruction->IsBoundType() ||
|
|
instruction->IsBoundsCheck() ||
|
|
instruction->IsCheckCast() ||
|
|
instruction->IsClassTableGet() ||
|
|
instruction->IsCurrentMethod() ||
|
|
instruction->IsDivZeroCheck() ||
|
|
(instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
|
|
(instruction->IsPredicatedInstanceFieldGet() &&
|
|
!instruction->AsPredicatedInstanceFieldGet()->IsVolatile()) ||
|
|
(instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
|
|
instruction->IsInstanceOf() ||
|
|
instruction->IsInvokeInterface() ||
|
|
instruction->IsInvokeStaticOrDirect() ||
|
|
instruction->IsInvokeUnresolved() ||
|
|
instruction->IsInvokeVirtual() ||
|
|
instruction->IsLoadString() ||
|
|
instruction->IsNewArray() ||
|
|
instruction->IsNewInstance() ||
|
|
instruction->IsNullCheck() ||
|
|
instruction->IsPackedSwitch() ||
|
|
instruction->IsParameterValue() ||
|
|
instruction->IsPhi() ||
|
|
instruction->IsReturn() ||
|
|
instruction->IsReturnVoid() ||
|
|
instruction->IsSelect() ||
|
|
(instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) ||
|
|
(instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) ||
|
|
instruction->IsSuspendCheck() ||
|
|
instruction->IsTypeConversion();
|
|
}
|
|
|
|
bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
|
|
// We may be only interested in loop blocks.
|
|
if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
|
|
return false;
|
|
}
|
|
if (block->GetTryCatchInformation() != nullptr) {
|
|
// Do not schedule blocks that are part of try-catch.
|
|
// Because scheduler cannot see if catch block has assumptions on the instruction order in
|
|
// the try block. In following example, if we enable scheduler for the try block,
|
|
// MulitiplyAccumulate may be scheduled before DivZeroCheck,
|
|
// which can result in an incorrect value in the catch block.
|
|
// try {
|
|
// a = a/b; // DivZeroCheck
|
|
// // Div
|
|
// c = c*d+e; // MulitiplyAccumulate
|
|
// } catch {System.out.print(c); }
|
|
return false;
|
|
}
|
|
// Check whether all instructions in this block are schedulable.
|
|
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
|
|
if (!IsSchedulable(it.Current())) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
|
|
return instr->IsControlFlow() ||
|
|
// Don't break calling convention.
|
|
instr->IsParameterValue() ||
|
|
// Code generation of goto relies on SuspendCheck's position.
|
|
instr->IsSuspendCheck();
|
|
}
|
|
|
|
bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
|
|
bool schedule_randomly) {
|
|
#if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
|
|
// Phase-local allocator that allocates scheduler internal data structures like
|
|
// scheduling nodes, internel nodes map, dependencies, etc.
|
|
CriticalPathSchedulingNodeSelector critical_path_selector;
|
|
RandomSchedulingNodeSelector random_selector;
|
|
SchedulingNodeSelector* selector = schedule_randomly
|
|
? static_cast<SchedulingNodeSelector*>(&random_selector)
|
|
: static_cast<SchedulingNodeSelector*>(&critical_path_selector);
|
|
#else
|
|
// Avoid compilation error when compiling for unsupported instruction set.
|
|
UNUSED(only_optimize_loop_blocks);
|
|
UNUSED(schedule_randomly);
|
|
UNUSED(codegen_);
|
|
#endif
|
|
|
|
switch (instruction_set_) {
|
|
#ifdef ART_ENABLE_CODEGEN_arm64
|
|
case InstructionSet::kArm64: {
|
|
arm64::HSchedulerARM64 scheduler(selector);
|
|
scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
|
|
scheduler.Schedule(graph_);
|
|
break;
|
|
}
|
|
#endif
|
|
#if defined(ART_ENABLE_CODEGEN_arm)
|
|
case InstructionSet::kThumb2:
|
|
case InstructionSet::kArm: {
|
|
arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
|
|
arm::HSchedulerARM scheduler(selector, &arm_latency_visitor);
|
|
scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
|
|
scheduler.Schedule(graph_);
|
|
break;
|
|
}
|
|
#endif
|
|
default:
|
|
break;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
} // namespace art
|