/* * Copyright (C) 2020 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ #define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ #include #include #include "base/arena_allocator.h" #include "base/arena_bit_vector.h" #include "base/arena_containers.h" #include "base/array_ref.h" #include "base/bit_vector-inl.h" #include "base/globals.h" #include "base/iteration_range.h" #include "base/mutex.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "base/stl_util.h" #include "base/transform_iterator.h" #include "nodes.h" namespace art { // Helper for transforming blocks to block_ids. class BlockToBlockIdTransformer { public: BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default; BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default; BlockToBlockIdTransformer() {} inline uint32_t operator()(const HBasicBlock* b) const { return b->GetBlockId(); } }; // Helper for transforming block ids to blocks. class BlockIdToBlockTransformer { public: BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default; BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default; explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {} inline const HGraph* GetGraph() const { return graph_; } inline HBasicBlock* GetBlock(uint32_t id) const { DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod(); HBasicBlock* blk = graph_->GetBlocks()[id]; DCHECK(blk != nullptr); return blk; } inline HBasicBlock* operator()(uint32_t id) const { return GetBlock(id); } private: const HGraph* const graph_; }; class BlockIdFilterThunk { public: explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {} BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default; BlockIdFilterThunk(const BlockIdFilterThunk&) = default; bool operator()(const HBasicBlock* b) const { return inner_.IsBitSet(b->GetBlockId()); } private: const BitVector& inner_; }; // A representation of a particular section of the graph. The graph is split // into an excluded and included area and is used to track escapes. // // This object is a view of the graph and is not updated as the graph is // changed. // // This is implemented by removing various escape points from the subgraph using // the 'RemoveBlock' function. Once all required blocks are removed one will // 'Finalize' the subgraph. This will extend the removed area to include: // (1) Any block which inevitably leads to (post-dominates) a removed block // (2) any block which is between 2 removed blocks // // This allows us to create a set of 'ExcludedCohorts' which are the // well-connected subsets of the graph made up of removed blocks. These cohorts // have a set of entry and exit blocks which act as the boundary of the cohort. // Since we removed blocks between 2 excluded blocks it is impossible for any // cohort-exit block to reach any cohort-entry block. This means we can use the // boundary between the cohort and the rest of the graph to insert // materialization blocks for partial LSE. // // TODO We really should expand this to take into account where the object // allocation takes place directly. Currently we always act as though it were // allocated in the entry block. This is a massively simplifying assumption but // means we can't partially remove objects that are repeatedly allocated in a // loop. class ExecutionSubgraph : public DeletableArenaObject { public: using BitVecBlockRange = IterationRange>; using FilteredBitVecBlockRange = IterationRange< FilterIterator::const_iterator, BlockIdFilterThunk>>; // A set of connected blocks which are connected and removed from the // ExecutionSubgraph. See above comment for explanation. class ExcludedCohort : public ArenaObject { public: ExcludedCohort(ExcludedCohort&&) = default; ExcludedCohort(const ExcludedCohort&) = delete; explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph) : graph_(graph), entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {} ~ExcludedCohort() = default; // All blocks in the cohort. BitVecBlockRange Blocks() const { return BlockIterRange(blocks_); } // Blocks that have predecessors outside of the cohort. These blocks will // need to have PHIs/control-flow added to create the escaping value. BitVecBlockRange EntryBlocks() const { return BlockIterRange(entry_blocks_); } FilteredBitVecBlockRange EntryBlocksReversePostOrder() const { return Filter(MakeIterationRange(graph_->GetReversePostOrder()), BlockIdFilterThunk(entry_blocks_)); } bool IsEntryBlock(const HBasicBlock* blk) const { return entry_blocks_.IsBitSet(blk->GetBlockId()); } // Blocks that have successors outside of the cohort. The successors of // these blocks will need to have PHI's to restore state. BitVecBlockRange ExitBlocks() const { return BlockIterRange(exit_blocks_); } bool operator==(const ExcludedCohort& other) const { return blocks_.Equal(&other.blocks_); } bool ContainsBlock(const HBasicBlock* blk) const { return blocks_.IsBitSet(blk->GetBlockId()); } // Returns true if there is a path from 'blk' to any block in this cohort. // NB blocks contained within the cohort are not considered to be succeeded // by the cohort (i.e. this function will return false). bool SucceedsBlock(const HBasicBlock* blk) const { if (ContainsBlock(blk)) { return false; } auto idxs = entry_blocks_.Indexes(); return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool { return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry); }); } // Returns true if there is a path from any block in this cohort to 'blk'. // NB blocks contained within the cohort are not considered to be preceded // by the cohort (i.e. this function will return false). bool PrecedesBlock(const HBasicBlock* blk) const { if (ContainsBlock(blk)) { return false; } auto idxs = exit_blocks_.Indexes(); return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool { return blk->GetGraph()->PathBetween(exit, blk->GetBlockId()); }); } void Dump(std::ostream& os) const; private: BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const { auto indexes = bv.Indexes(); BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_)); return res; } ExcludedCohort() = delete; HGraph* graph_; ArenaBitVector entry_blocks_; ArenaBitVector exit_blocks_; ArenaBitVector blocks_; friend class ExecutionSubgraph; friend class LoadStoreAnalysisTest; }; // The number of successors we can track on a single block. Graphs which // contain a block with a branching factor greater than this will not be // analysed. This is used to both limit the memory usage of analysis to // reasonable levels and ensure that the analysis will complete in a // reasonable amount of time. It also simplifies the implementation somewhat // to have a constant branching factor. static constexpr uint32_t kMaxFilterableSuccessors = 8; // Instantiate a subgraph. The subgraph can be instantiated only if partial-escape // analysis is desired (eg not when being used for instruction scheduling) and // when the branching factor in the graph is not too high. These conditions // are determined once and passed down for performance reasons. ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator); void Invalidate() { valid_ = false; } // A block is contained by the ExecutionSubgraph if it is reachable. This // means it has not been removed explicitly or via pruning/concavity removal. // Finalization is needed to call this function. // See RemoveConcavity and Prune for more information. bool ContainsBlock(const HBasicBlock* blk) const { DCHECK(!finalized_ || !needs_prune_) << "finalized: " << finalized_; if (!valid_) { return false; } return !unreachable_blocks_.IsBitSet(blk->GetBlockId()); } // Mark the block as removed from the subgraph. void RemoveBlock(const HBasicBlock* to_remove); // Called when no more updates will be done to the subgraph. Calculate the // final subgraph void Finalize() { Prune(); RemoveConcavity(); finalized_ = true; } BitVecBlockRange UnreachableBlocks() const { auto idxs = unreachable_blocks_.Indexes(); return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_)); } // Returns true if all allowed execution paths from start eventually reach the // graph's exit block (or diverge). bool IsValid() const { return valid_; } ArrayRef GetExcludedCohorts() const { DCHECK(!valid_ || !needs_prune_); if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) { return ArrayRef(); } else { return ArrayRef(*excluded_list_); } } // Helper class to create reachable blocks iterator. class ContainsFunctor { public: bool operator()(HBasicBlock* blk) const { return subgraph_->ContainsBlock(blk); } private: explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {} const ExecutionSubgraph* const subgraph_; friend class ExecutionSubgraph; }; // Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing. IterationRange< FilterIterator::const_iterator, ContainsFunctor>> ReachableBlocks() const { return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this)); } static bool CanAnalyse(HGraph* graph) { // If there are any blocks with more than kMaxFilterableSuccessors we can't // analyse the graph. We avoid this case to prevent excessive memory and // time usage while allowing a simpler algorithm with a fixed-width // branching factor. return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) { return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors; }); } private: std::bitset GetAllowedSuccessors(const HBasicBlock* blk) const { DCHECK(valid_); return allowed_successors_[blk->GetBlockId()]; } void LimitBlockSuccessors(const HBasicBlock* block, std::bitset allowed) { needs_prune_ = true; allowed_successors_[block->GetBlockId()] &= allowed; } // Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal // with only conditionally materializing objects depending on if we already materialized them // Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) && // PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures // that no execution can leave and then re-enter any exclusion. void RemoveConcavity(); // Removes sink nodes. Sink nodes are nodes where there is no execution which // avoids all removed nodes. void Prune(); void RecalculateExcludedCohort(); HGraph* graph_; ScopedArenaAllocator* allocator_; // The map from block_id -> allowed-successors. // This is the canonical representation of this subgraph. If a bit in the // bitset is not set then the corresponding outgoing edge of that block is not // considered traversable. ScopedArenaVector> allowed_successors_; // Helper that holds which blocks we are able to reach. Only valid if // 'needs_prune_ == false'. ArenaBitVector unreachable_blocks_; // A list of the excluded-cohorts of this subgraph. This is only valid when // 'needs_prune_ == false' std::optional> excluded_list_; // Bool to hold if there is at least one known path from the start block to // the end in this graph. Used to short-circuit computation. bool valid_; // True if the subgraph is consistent and can be queried. Modifying the // subgraph clears this and requires a prune to restore. bool needs_prune_; // True if no more modification of the subgraph is permitted. bool finalized_; friend class ExecutionSubgraphTest; friend class LoadStoreAnalysisTest; DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph); }; std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex); } // namespace art #endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_