You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
365 lines
14 KiB
365 lines
14 KiB
/*
|
|
* Copyright (C) 2020 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
|
|
#define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
|
|
|
|
#include <algorithm>
|
|
#include <sstream>
|
|
|
|
#include "base/arena_allocator.h"
|
|
#include "base/arena_bit_vector.h"
|
|
#include "base/arena_containers.h"
|
|
#include "base/array_ref.h"
|
|
#include "base/bit_vector-inl.h"
|
|
#include "base/globals.h"
|
|
#include "base/iteration_range.h"
|
|
#include "base/mutex.h"
|
|
#include "base/scoped_arena_allocator.h"
|
|
#include "base/scoped_arena_containers.h"
|
|
#include "base/stl_util.h"
|
|
#include "base/transform_iterator.h"
|
|
#include "nodes.h"
|
|
|
|
namespace art {
|
|
|
|
// Helper for transforming blocks to block_ids.
|
|
class BlockToBlockIdTransformer {
|
|
public:
|
|
BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default;
|
|
BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default;
|
|
BlockToBlockIdTransformer() {}
|
|
|
|
inline uint32_t operator()(const HBasicBlock* b) const {
|
|
return b->GetBlockId();
|
|
}
|
|
};
|
|
|
|
// Helper for transforming block ids to blocks.
|
|
class BlockIdToBlockTransformer {
|
|
public:
|
|
BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default;
|
|
BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default;
|
|
explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {}
|
|
|
|
inline const HGraph* GetGraph() const {
|
|
return graph_;
|
|
}
|
|
|
|
inline HBasicBlock* GetBlock(uint32_t id) const {
|
|
DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod();
|
|
HBasicBlock* blk = graph_->GetBlocks()[id];
|
|
DCHECK(blk != nullptr);
|
|
return blk;
|
|
}
|
|
|
|
inline HBasicBlock* operator()(uint32_t id) const {
|
|
return GetBlock(id);
|
|
}
|
|
|
|
private:
|
|
const HGraph* const graph_;
|
|
};
|
|
|
|
class BlockIdFilterThunk {
|
|
public:
|
|
explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {}
|
|
BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default;
|
|
BlockIdFilterThunk(const BlockIdFilterThunk&) = default;
|
|
|
|
bool operator()(const HBasicBlock* b) const {
|
|
return inner_.IsBitSet(b->GetBlockId());
|
|
}
|
|
|
|
private:
|
|
const BitVector& inner_;
|
|
};
|
|
|
|
// A representation of a particular section of the graph. The graph is split
|
|
// into an excluded and included area and is used to track escapes.
|
|
//
|
|
// This object is a view of the graph and is not updated as the graph is
|
|
// changed.
|
|
//
|
|
// This is implemented by removing various escape points from the subgraph using
|
|
// the 'RemoveBlock' function. Once all required blocks are removed one will
|
|
// 'Finalize' the subgraph. This will extend the removed area to include:
|
|
// (1) Any block which inevitably leads to (post-dominates) a removed block
|
|
// (2) any block which is between 2 removed blocks
|
|
//
|
|
// This allows us to create a set of 'ExcludedCohorts' which are the
|
|
// well-connected subsets of the graph made up of removed blocks. These cohorts
|
|
// have a set of entry and exit blocks which act as the boundary of the cohort.
|
|
// Since we removed blocks between 2 excluded blocks it is impossible for any
|
|
// cohort-exit block to reach any cohort-entry block. This means we can use the
|
|
// boundary between the cohort and the rest of the graph to insert
|
|
// materialization blocks for partial LSE.
|
|
//
|
|
// TODO We really should expand this to take into account where the object
|
|
// allocation takes place directly. Currently we always act as though it were
|
|
// allocated in the entry block. This is a massively simplifying assumption but
|
|
// means we can't partially remove objects that are repeatedly allocated in a
|
|
// loop.
|
|
class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> {
|
|
public:
|
|
using BitVecBlockRange =
|
|
IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>;
|
|
using FilteredBitVecBlockRange = IterationRange<
|
|
FilterIterator<ArenaVector<HBasicBlock*>::const_iterator, BlockIdFilterThunk>>;
|
|
|
|
// A set of connected blocks which are connected and removed from the
|
|
// ExecutionSubgraph. See above comment for explanation.
|
|
class ExcludedCohort : public ArenaObject<kArenaAllocLSA> {
|
|
public:
|
|
ExcludedCohort(ExcludedCohort&&) = default;
|
|
ExcludedCohort(const ExcludedCohort&) = delete;
|
|
explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph)
|
|
: graph_(graph),
|
|
entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA),
|
|
exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA),
|
|
blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {}
|
|
|
|
~ExcludedCohort() = default;
|
|
|
|
// All blocks in the cohort.
|
|
BitVecBlockRange Blocks() const {
|
|
return BlockIterRange(blocks_);
|
|
}
|
|
|
|
// Blocks that have predecessors outside of the cohort. These blocks will
|
|
// need to have PHIs/control-flow added to create the escaping value.
|
|
BitVecBlockRange EntryBlocks() const {
|
|
return BlockIterRange(entry_blocks_);
|
|
}
|
|
|
|
FilteredBitVecBlockRange EntryBlocksReversePostOrder() const {
|
|
return Filter(MakeIterationRange(graph_->GetReversePostOrder()),
|
|
BlockIdFilterThunk(entry_blocks_));
|
|
}
|
|
|
|
bool IsEntryBlock(const HBasicBlock* blk) const {
|
|
return entry_blocks_.IsBitSet(blk->GetBlockId());
|
|
}
|
|
|
|
// Blocks that have successors outside of the cohort. The successors of
|
|
// these blocks will need to have PHI's to restore state.
|
|
BitVecBlockRange ExitBlocks() const {
|
|
return BlockIterRange(exit_blocks_);
|
|
}
|
|
|
|
bool operator==(const ExcludedCohort& other) const {
|
|
return blocks_.Equal(&other.blocks_);
|
|
}
|
|
|
|
bool ContainsBlock(const HBasicBlock* blk) const {
|
|
return blocks_.IsBitSet(blk->GetBlockId());
|
|
}
|
|
|
|
// Returns true if there is a path from 'blk' to any block in this cohort.
|
|
// NB blocks contained within the cohort are not considered to be succeeded
|
|
// by the cohort (i.e. this function will return false).
|
|
bool SucceedsBlock(const HBasicBlock* blk) const {
|
|
if (ContainsBlock(blk)) {
|
|
return false;
|
|
}
|
|
auto idxs = entry_blocks_.Indexes();
|
|
return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool {
|
|
return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry);
|
|
});
|
|
}
|
|
|
|
// Returns true if there is a path from any block in this cohort to 'blk'.
|
|
// NB blocks contained within the cohort are not considered to be preceded
|
|
// by the cohort (i.e. this function will return false).
|
|
bool PrecedesBlock(const HBasicBlock* blk) const {
|
|
if (ContainsBlock(blk)) {
|
|
return false;
|
|
}
|
|
auto idxs = exit_blocks_.Indexes();
|
|
return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool {
|
|
return blk->GetGraph()->PathBetween(exit, blk->GetBlockId());
|
|
});
|
|
}
|
|
|
|
void Dump(std::ostream& os) const;
|
|
|
|
private:
|
|
BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const {
|
|
auto indexes = bv.Indexes();
|
|
BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_));
|
|
return res;
|
|
}
|
|
|
|
ExcludedCohort() = delete;
|
|
|
|
HGraph* graph_;
|
|
ArenaBitVector entry_blocks_;
|
|
ArenaBitVector exit_blocks_;
|
|
ArenaBitVector blocks_;
|
|
|
|
friend class ExecutionSubgraph;
|
|
friend class LoadStoreAnalysisTest;
|
|
};
|
|
|
|
// The number of successors we can track on a single block. Graphs which
|
|
// contain a block with a branching factor greater than this will not be
|
|
// analysed. This is used to both limit the memory usage of analysis to
|
|
// reasonable levels and ensure that the analysis will complete in a
|
|
// reasonable amount of time. It also simplifies the implementation somewhat
|
|
// to have a constant branching factor.
|
|
static constexpr uint32_t kMaxFilterableSuccessors = 8;
|
|
|
|
// Instantiate a subgraph. The subgraph can be instantiated only if partial-escape
|
|
// analysis is desired (eg not when being used for instruction scheduling) and
|
|
// when the branching factor in the graph is not too high. These conditions
|
|
// are determined once and passed down for performance reasons.
|
|
ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator);
|
|
|
|
void Invalidate() {
|
|
valid_ = false;
|
|
}
|
|
|
|
// A block is contained by the ExecutionSubgraph if it is reachable. This
|
|
// means it has not been removed explicitly or via pruning/concavity removal.
|
|
// Finalization is needed to call this function.
|
|
// See RemoveConcavity and Prune for more information.
|
|
bool ContainsBlock(const HBasicBlock* blk) const {
|
|
DCHECK(!finalized_ || !needs_prune_) << "finalized: " << finalized_;
|
|
if (!valid_) {
|
|
return false;
|
|
}
|
|
return !unreachable_blocks_.IsBitSet(blk->GetBlockId());
|
|
}
|
|
|
|
// Mark the block as removed from the subgraph.
|
|
void RemoveBlock(const HBasicBlock* to_remove);
|
|
|
|
// Called when no more updates will be done to the subgraph. Calculate the
|
|
// final subgraph
|
|
void Finalize() {
|
|
Prune();
|
|
RemoveConcavity();
|
|
finalized_ = true;
|
|
}
|
|
|
|
BitVecBlockRange UnreachableBlocks() const {
|
|
auto idxs = unreachable_blocks_.Indexes();
|
|
return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_));
|
|
}
|
|
|
|
// Returns true if all allowed execution paths from start eventually reach the
|
|
// graph's exit block (or diverge).
|
|
bool IsValid() const {
|
|
return valid_;
|
|
}
|
|
|
|
ArrayRef<const ExcludedCohort> GetExcludedCohorts() const {
|
|
DCHECK(!valid_ || !needs_prune_);
|
|
if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) {
|
|
return ArrayRef<const ExcludedCohort>();
|
|
} else {
|
|
return ArrayRef<const ExcludedCohort>(*excluded_list_);
|
|
}
|
|
}
|
|
|
|
// Helper class to create reachable blocks iterator.
|
|
class ContainsFunctor {
|
|
public:
|
|
bool operator()(HBasicBlock* blk) const {
|
|
return subgraph_->ContainsBlock(blk);
|
|
}
|
|
|
|
private:
|
|
explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {}
|
|
const ExecutionSubgraph* const subgraph_;
|
|
friend class ExecutionSubgraph;
|
|
};
|
|
// Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing.
|
|
IterationRange<
|
|
FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>>
|
|
ReachableBlocks() const {
|
|
return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this));
|
|
}
|
|
|
|
static bool CanAnalyse(HGraph* graph) {
|
|
// If there are any blocks with more than kMaxFilterableSuccessors we can't
|
|
// analyse the graph. We avoid this case to prevent excessive memory and
|
|
// time usage while allowing a simpler algorithm with a fixed-width
|
|
// branching factor.
|
|
return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) {
|
|
return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors;
|
|
});
|
|
}
|
|
|
|
private:
|
|
std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const {
|
|
DCHECK(valid_);
|
|
return allowed_successors_[blk->GetBlockId()];
|
|
}
|
|
|
|
void LimitBlockSuccessors(const HBasicBlock* block,
|
|
std::bitset<kMaxFilterableSuccessors> allowed) {
|
|
needs_prune_ = true;
|
|
allowed_successors_[block->GetBlockId()] &= allowed;
|
|
}
|
|
|
|
// Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal
|
|
// with only conditionally materializing objects depending on if we already materialized them
|
|
// Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) &&
|
|
// PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures
|
|
// that no execution can leave and then re-enter any exclusion.
|
|
void RemoveConcavity();
|
|
|
|
// Removes sink nodes. Sink nodes are nodes where there is no execution which
|
|
// avoids all removed nodes.
|
|
void Prune();
|
|
|
|
void RecalculateExcludedCohort();
|
|
|
|
HGraph* graph_;
|
|
ScopedArenaAllocator* allocator_;
|
|
// The map from block_id -> allowed-successors.
|
|
// This is the canonical representation of this subgraph. If a bit in the
|
|
// bitset is not set then the corresponding outgoing edge of that block is not
|
|
// considered traversable.
|
|
ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_;
|
|
// Helper that holds which blocks we are able to reach. Only valid if
|
|
// 'needs_prune_ == false'.
|
|
ArenaBitVector unreachable_blocks_;
|
|
// A list of the excluded-cohorts of this subgraph. This is only valid when
|
|
// 'needs_prune_ == false'
|
|
std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_;
|
|
// Bool to hold if there is at least one known path from the start block to
|
|
// the end in this graph. Used to short-circuit computation.
|
|
bool valid_;
|
|
// True if the subgraph is consistent and can be queried. Modifying the
|
|
// subgraph clears this and requires a prune to restore.
|
|
bool needs_prune_;
|
|
// True if no more modification of the subgraph is permitted.
|
|
bool finalized_;
|
|
|
|
friend class ExecutionSubgraphTest;
|
|
friend class LoadStoreAnalysisTest;
|
|
|
|
DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph);
|
|
};
|
|
|
|
std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex);
|
|
|
|
} // namespace art
|
|
|
|
#endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
|