You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
565 lines
20 KiB
565 lines
20 KiB
/*
|
|
* Copyright (C) 2014 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "parallel_move_resolver.h"
|
|
|
|
#include "base/stl_util.h"
|
|
#include "nodes.h"
|
|
|
|
namespace art {
|
|
|
|
void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
|
|
// Perform a linear sweep of the moves to add them to the initial list of
|
|
// moves to perform, ignoring any move that is redundant (the source is
|
|
// the same as the destination, the destination is ignored and
|
|
// unallocated, or the move was already eliminated).
|
|
for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
|
|
MoveOperands* move = parallel_move->MoveOperandsAt(i);
|
|
if (!move->IsRedundant()) {
|
|
moves_.push_back(move);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) {
|
|
DCHECK(moves_.empty());
|
|
// Build up a worklist of moves.
|
|
BuildInitialMoveList(parallel_move);
|
|
|
|
// Move stack/stack slot to take advantage of a free register on constrained machines.
|
|
for (size_t i = 0; i < moves_.size(); ++i) {
|
|
const MoveOperands& move = *moves_[i];
|
|
// Ignore constants and moves already eliminated.
|
|
if (move.IsEliminated() || move.GetSource().IsConstant()) {
|
|
continue;
|
|
}
|
|
|
|
if ((move.GetSource().IsStackSlot() || move.GetSource().IsDoubleStackSlot()) &&
|
|
(move.GetDestination().IsStackSlot() || move.GetDestination().IsDoubleStackSlot())) {
|
|
PerformMove(i);
|
|
}
|
|
}
|
|
|
|
for (size_t i = 0; i < moves_.size(); ++i) {
|
|
const MoveOperands& move = *moves_[i];
|
|
// Skip constants to perform them last. They don't block other moves
|
|
// and skipping such moves with register destinations keeps those
|
|
// registers free for the whole algorithm.
|
|
if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
|
|
PerformMove(i);
|
|
}
|
|
}
|
|
|
|
// Perform the moves with constant sources.
|
|
for (size_t i = 0; i < moves_.size(); ++i) {
|
|
MoveOperands* move = moves_[i];
|
|
if (!move->IsEliminated()) {
|
|
DCHECK(move->GetSource().IsConstant());
|
|
EmitMove(i);
|
|
// Eliminate the move, in case following moves need a scratch register.
|
|
move->Eliminate();
|
|
}
|
|
}
|
|
|
|
moves_.clear();
|
|
}
|
|
|
|
Location LowOf(Location location) {
|
|
if (location.IsRegisterPair()) {
|
|
return Location::RegisterLocation(location.low());
|
|
} else if (location.IsFpuRegisterPair()) {
|
|
return Location::FpuRegisterLocation(location.low());
|
|
} else if (location.IsDoubleStackSlot()) {
|
|
return Location::StackSlot(location.GetStackIndex());
|
|
} else {
|
|
return Location::NoLocation();
|
|
}
|
|
}
|
|
|
|
Location HighOf(Location location) {
|
|
if (location.IsRegisterPair()) {
|
|
return Location::RegisterLocation(location.high());
|
|
} else if (location.IsFpuRegisterPair()) {
|
|
return Location::FpuRegisterLocation(location.high());
|
|
} else if (location.IsDoubleStackSlot()) {
|
|
return Location::StackSlot(location.GetHighStackIndex(4));
|
|
} else {
|
|
return Location::NoLocation();
|
|
}
|
|
}
|
|
|
|
// Update the source of `move`, knowing that `updated_location` has been swapped
|
|
// with `new_source`. Note that `updated_location` can be a pair, therefore if
|
|
// `move` is non-pair, we need to extract which register to use.
|
|
static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
|
|
Location source = move->GetSource();
|
|
if (LowOf(updated_location).Equals(source)) {
|
|
move->SetSource(LowOf(new_source));
|
|
} else if (HighOf(updated_location).Equals(source)) {
|
|
move->SetSource(HighOf(new_source));
|
|
} else {
|
|
DCHECK(updated_location.Equals(source)) << updated_location << " " << source;
|
|
move->SetSource(new_source);
|
|
}
|
|
}
|
|
|
|
MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) {
|
|
// Each call to this function performs a move and deletes it from the move
|
|
// graph. We first recursively perform any move blocking this one. We
|
|
// mark a move as "pending" on entry to PerformMove in order to detect
|
|
// cycles in the move graph. We use operand swaps to resolve cycles,
|
|
// which means that a call to PerformMove could change any source operand
|
|
// in the move graph.
|
|
|
|
MoveOperands* move = moves_[index];
|
|
DCHECK(!move->IsPending());
|
|
if (move->IsRedundant()) {
|
|
// Because we swap register pairs first, following, un-pending
|
|
// moves may become redundant.
|
|
move->Eliminate();
|
|
return nullptr;
|
|
}
|
|
|
|
// Clear this move's destination to indicate a pending move. The actual
|
|
// destination is saved in a stack-allocated local. Recursion may allow
|
|
// multiple moves to be pending.
|
|
DCHECK(!move->GetSource().IsInvalid());
|
|
Location destination = move->MarkPending();
|
|
|
|
// Perform a depth-first traversal of the move graph to resolve
|
|
// dependencies. Any unperformed, unpending move with a source the same
|
|
// as this one's destination blocks this one so recursively perform all
|
|
// such moves.
|
|
MoveOperands* required_swap = nullptr;
|
|
for (size_t i = 0; i < moves_.size(); ++i) {
|
|
const MoveOperands& other_move = *moves_[i];
|
|
if (other_move.Blocks(destination) && !other_move.IsPending()) {
|
|
// Though PerformMove can change any source operand in the move graph,
|
|
// calling `PerformMove` cannot create a blocking move via a swap
|
|
// (this loop does not miss any).
|
|
// For example, assume there is a non-blocking move with source A
|
|
// and this move is blocked on source B and there is a swap of A and
|
|
// B. Then A and B must be involved in the same cycle (or they would
|
|
// not be swapped). Since this move's destination is B and there is
|
|
// only a single incoming edge to an operand, this move must also be
|
|
// involved in the same cycle. In that case, the blocking move will
|
|
// be created but will be "pending" when we return from PerformMove.
|
|
required_swap = PerformMove(i);
|
|
|
|
if (required_swap == move) {
|
|
// If this move is required to swap, we do so without looking
|
|
// at the next moves. Swapping is not blocked by anything, it just
|
|
// updates other moves's source.
|
|
break;
|
|
} else if (required_swap == moves_[i]) {
|
|
// If `other_move` was swapped, we iterate again to find a new
|
|
// potential cycle.
|
|
required_swap = nullptr;
|
|
i = -1;
|
|
} else if (required_swap != nullptr) {
|
|
// A move is required to swap. We walk back the cycle to find the
|
|
// move by just returning from this `PerformMove`.
|
|
moves_[index]->ClearPending(destination);
|
|
return required_swap;
|
|
}
|
|
}
|
|
}
|
|
|
|
// We are about to resolve this move and don't need it marked as
|
|
// pending, so restore its destination.
|
|
move->ClearPending(destination);
|
|
|
|
// This move's source may have changed due to swaps to resolve cycles and
|
|
// so it may now be the last move in the cycle. If so remove it.
|
|
if (move->GetSource().Equals(destination)) {
|
|
move->Eliminate();
|
|
DCHECK(required_swap == nullptr);
|
|
return nullptr;
|
|
}
|
|
|
|
// The move may be blocked on a (at most one) pending move, in which case
|
|
// we have a cycle. Search for such a blocking move and perform a swap to
|
|
// resolve it.
|
|
bool do_swap = false;
|
|
if (required_swap != nullptr) {
|
|
DCHECK_EQ(required_swap, move);
|
|
do_swap = true;
|
|
} else {
|
|
for (MoveOperands* other_move : moves_) {
|
|
if (other_move->Blocks(destination)) {
|
|
DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move;
|
|
if (!move->Is64BitMove() && other_move->Is64BitMove()) {
|
|
// We swap 64bits moves before swapping 32bits moves. Go back from the
|
|
// cycle by returning the move that must be swapped.
|
|
return other_move;
|
|
}
|
|
do_swap = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (do_swap) {
|
|
EmitSwap(index);
|
|
// Any unperformed (including pending) move with a source of either
|
|
// this move's source or destination needs to have their source
|
|
// changed to reflect the state of affairs after the swap.
|
|
Location source = move->GetSource();
|
|
Location swap_destination = move->GetDestination();
|
|
move->Eliminate();
|
|
for (MoveOperands* other_move : moves_) {
|
|
if (other_move->Blocks(source)) {
|
|
UpdateSourceOf(other_move, source, swap_destination);
|
|
} else if (other_move->Blocks(swap_destination)) {
|
|
UpdateSourceOf(other_move, swap_destination, source);
|
|
}
|
|
}
|
|
// If the swap was required because of a 64bits move in the middle of a cycle,
|
|
// we return the swapped move, so that the caller knows it needs to re-iterate
|
|
// its dependency loop.
|
|
return required_swap;
|
|
} else {
|
|
// This move is not blocked.
|
|
EmitMove(index);
|
|
move->Eliminate();
|
|
DCHECK(required_swap == nullptr);
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) {
|
|
for (MoveOperands* move : moves_) {
|
|
if (move->Blocks(loc)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
for (MoveOperands* move : moves_) {
|
|
if (move->GetDestination().Equals(loc)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
int ParallelMoveResolverWithSwap::AllocateScratchRegister(int blocked,
|
|
int register_count,
|
|
int if_scratch,
|
|
bool* spilled) {
|
|
DCHECK_NE(blocked, if_scratch);
|
|
int scratch = -1;
|
|
for (int reg = 0; reg < register_count; ++reg) {
|
|
if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
|
|
scratch = reg;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (scratch == -1) {
|
|
*spilled = true;
|
|
scratch = if_scratch;
|
|
} else {
|
|
*spilled = false;
|
|
}
|
|
|
|
return scratch;
|
|
}
|
|
|
|
|
|
ParallelMoveResolverWithSwap::ScratchRegisterScope::ScratchRegisterScope(
|
|
ParallelMoveResolverWithSwap* resolver, int blocked, int if_scratch, int number_of_registers)
|
|
: resolver_(resolver),
|
|
reg_(kNoRegister),
|
|
spilled_(false) {
|
|
reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers, if_scratch, &spilled_);
|
|
|
|
if (spilled_) {
|
|
resolver->SpillScratch(reg_);
|
|
}
|
|
}
|
|
|
|
|
|
ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() {
|
|
if (spilled_) {
|
|
resolver_->RestoreScratch(reg_);
|
|
}
|
|
}
|
|
|
|
void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) {
|
|
DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
|
|
DCHECK(moves_.empty());
|
|
DCHECK(scratches_.empty());
|
|
|
|
// Backend dependent initialization.
|
|
PrepareForEmitNativeCode();
|
|
|
|
// Build up a worklist of moves.
|
|
BuildInitialMoveList(parallel_move);
|
|
|
|
for (size_t i = 0; i < moves_.size(); ++i) {
|
|
const MoveOperands& move = *moves_[i];
|
|
// Skip constants to perform them last. They don't block other moves and
|
|
// skipping such moves with register destinations keeps those registers
|
|
// free for the whole algorithm.
|
|
if (!move.IsEliminated() && !move.GetSource().IsConstant()) {
|
|
PerformMove(i);
|
|
}
|
|
}
|
|
|
|
// Perform the moves with constant sources and register destinations with UpdateMoveSource()
|
|
// to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit
|
|
// from changing the constant sources to stack locations.
|
|
for (size_t i = 0; i < moves_.size(); ++i) {
|
|
MoveOperands* move = moves_[i];
|
|
Location destination = move->GetDestination();
|
|
if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) {
|
|
Location source = move->GetSource();
|
|
EmitMove(i);
|
|
move->Eliminate();
|
|
// This may introduce additional instruction dependency, but reduce number
|
|
// of moves and possible literal loads. For example,
|
|
// Original moves:
|
|
// 1234.5678 -> D0
|
|
// 1234.5678 -> D1
|
|
// Updated moves:
|
|
// 1234.5678 -> D0
|
|
// D0 -> D1
|
|
UpdateMoveSource(source, destination);
|
|
}
|
|
}
|
|
|
|
// Perform the rest of the moves.
|
|
for (size_t i = 0; i < moves_.size(); ++i) {
|
|
MoveOperands* move = moves_[i];
|
|
if (!move->IsEliminated()) {
|
|
EmitMove(i);
|
|
move->Eliminate();
|
|
}
|
|
}
|
|
|
|
// All pending moves that we have added for resolve cycles should be performed.
|
|
DCHECK_EQ(GetNumberOfPendingMoves(), 0u);
|
|
|
|
// Backend dependent cleanup.
|
|
FinishEmitNativeCode();
|
|
|
|
moves_.clear();
|
|
scratches_.clear();
|
|
}
|
|
|
|
Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) {
|
|
for (Location loc : scratches_) {
|
|
if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
|
|
return loc;
|
|
}
|
|
}
|
|
for (MoveOperands* move : moves_) {
|
|
Location loc = move->GetDestination();
|
|
if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) {
|
|
return loc;
|
|
}
|
|
}
|
|
return Location::NoLocation();
|
|
}
|
|
|
|
void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) {
|
|
if (kIsDebugBuild) {
|
|
for (Location scratch : scratches_) {
|
|
CHECK(!loc.Equals(scratch));
|
|
}
|
|
}
|
|
scratches_.push_back(loc);
|
|
}
|
|
|
|
void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) {
|
|
DCHECK(!IsBlockedByMoves(loc));
|
|
for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) {
|
|
if (loc.Equals(*it)) {
|
|
scratches_.erase(it);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void ParallelMoveResolverNoSwap::PerformMove(size_t index) {
|
|
// Each call to this function performs a move and deletes it from the move
|
|
// graph. We first recursively perform any move blocking this one. We mark
|
|
// a move as "pending" on entry to PerformMove in order to detect cycles
|
|
// in the move graph. We use scratch location to resolve cycles, also
|
|
// additional pending moves might be added. After move has been performed,
|
|
// we will update source operand in the move graph to reduce dependencies in
|
|
// the graph.
|
|
|
|
MoveOperands* move = moves_[index];
|
|
DCHECK(!move->IsPending());
|
|
DCHECK(!move->IsEliminated());
|
|
if (move->IsRedundant()) {
|
|
// Previous operations on the list of moves have caused this particular move
|
|
// to become a no-op, so we can safely eliminate it. Consider for example
|
|
// (0 -> 1) (1 -> 0) (1 -> 2). There is a cycle (0 -> 1) (1 -> 0), that we will
|
|
// resolve as (1 -> scratch) (0 -> 1) (scratch -> 0). If, by chance, '2' is
|
|
// used as the scratch location, the move (1 -> 2) will occur while resolving
|
|
// the cycle. When that move is emitted, the code will update moves with a '1'
|
|
// as their source to use '2' instead (see `UpdateMoveSource()`. In our example
|
|
// the initial move (1 -> 2) would then become the no-op (2 -> 2) that can be
|
|
// eliminated here.
|
|
move->Eliminate();
|
|
return;
|
|
}
|
|
|
|
// Clear this move's destination to indicate a pending move. The actual
|
|
// destination is saved in a stack-allocated local. Recursion may allow
|
|
// multiple moves to be pending.
|
|
DCHECK(!move->GetSource().IsInvalid());
|
|
Location destination = move->MarkPending();
|
|
|
|
// Perform a depth-first traversal of the move graph to resolve
|
|
// dependencies. Any unperformed, unpending move with a source the same
|
|
// as this one's destination blocks this one so recursively perform all
|
|
// such moves.
|
|
for (size_t i = 0; i < moves_.size(); ++i) {
|
|
const MoveOperands& other_move = *moves_[i];
|
|
if (other_move.Blocks(destination) && !other_move.IsPending()) {
|
|
PerformMove(i);
|
|
}
|
|
}
|
|
|
|
// We are about to resolve this move and don't need it marked as
|
|
// pending, so restore its destination.
|
|
move->ClearPending(destination);
|
|
|
|
// No one else should write to the move destination when the it is pending.
|
|
DCHECK(!move->IsRedundant());
|
|
|
|
Location source = move->GetSource();
|
|
// The move may be blocked on several pending moves, in case we have a cycle.
|
|
if (IsBlockedByMoves(destination)) {
|
|
// For a cycle like: (A -> B) (B -> C) (C -> A), we change it to following
|
|
// sequence:
|
|
// (C -> scratch) # Emit right now.
|
|
// (A -> B) (B -> C) # Unblocked.
|
|
// (scratch -> A) # Add to pending_moves_, blocked by (A -> B).
|
|
Location::Kind kind = source.GetKind();
|
|
DCHECK_NE(kind, Location::kConstant);
|
|
Location scratch = AllocateScratchLocationFor(kind);
|
|
// We only care about the move size.
|
|
DataType::Type type = move->Is64BitMove() ? DataType::Type::kInt64 : DataType::Type::kInt32;
|
|
// Perform (C -> scratch)
|
|
move->SetDestination(scratch);
|
|
EmitMove(index);
|
|
move->Eliminate();
|
|
UpdateMoveSource(source, scratch);
|
|
// Add (scratch -> A).
|
|
AddPendingMove(scratch, destination, type);
|
|
} else {
|
|
// This move is not blocked.
|
|
EmitMove(index);
|
|
move->Eliminate();
|
|
UpdateMoveSource(source, destination);
|
|
}
|
|
|
|
// Moves in the pending list should not block any other moves. But performing
|
|
// unblocked moves in the pending list can free scratch registers, so we do this
|
|
// as early as possible.
|
|
MoveOperands* pending_move;
|
|
while ((pending_move = GetUnblockedPendingMove(source)) != nullptr) {
|
|
Location pending_source = pending_move->GetSource();
|
|
Location pending_destination = pending_move->GetDestination();
|
|
// We do not depend on the pending move index. So just delete the move instead
|
|
// of eliminating it to make the pending list cleaner.
|
|
DeletePendingMove(pending_move);
|
|
move->SetSource(pending_source);
|
|
move->SetDestination(pending_destination);
|
|
EmitMove(index);
|
|
move->Eliminate();
|
|
UpdateMoveSource(pending_source, pending_destination);
|
|
// Free any unblocked locations in the scratch location list.
|
|
// Note: Fetch size() on each iteration because scratches_ can be modified inside the loop.
|
|
// FIXME: If FreeScratchLocation() removes the location from scratches_,
|
|
// we skip the next location. This happens for arm64.
|
|
for (size_t i = 0; i < scratches_.size(); ++i) {
|
|
Location scratch = scratches_[i];
|
|
// Only scratch overlapping with performed move source can be unblocked.
|
|
if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) {
|
|
FreeScratchLocation(pending_source);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) {
|
|
// This function is used to reduce the dependencies in the graph after
|
|
// (from -> to) has been performed. Since we ensure there is no move with the same
|
|
// destination, (to -> X) cannot be blocked while (from -> X) might still be
|
|
// blocked. Consider for example the moves (0 -> 1) (1 -> 2) (1 -> 3). After
|
|
// (1 -> 2) has been performed, the moves left are (0 -> 1) and (1 -> 3). There is
|
|
// a dependency between the two. If we update the source location from 1 to 2, we
|
|
// will get (0 -> 1) and (2 -> 3). There is no dependency between the two.
|
|
//
|
|
// This is not something we must do, but we can use fewer scratch locations with
|
|
// this trick. For example, we can avoid using additional scratch locations for
|
|
// moves (0 -> 1), (1 -> 2), (1 -> 0).
|
|
for (MoveOperands* move : moves_) {
|
|
if (move->GetSource().Equals(from)) {
|
|
move->SetSource(to);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ParallelMoveResolverNoSwap::AddPendingMove(Location source,
|
|
Location destination,
|
|
DataType::Type type) {
|
|
pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr));
|
|
}
|
|
|
|
void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) {
|
|
RemoveElement(pending_moves_, move);
|
|
}
|
|
|
|
MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) {
|
|
for (MoveOperands* move : pending_moves_) {
|
|
Location destination = move->GetDestination();
|
|
// Only moves with destination overlapping with input loc can be unblocked.
|
|
if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) {
|
|
return move;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) {
|
|
for (MoveOperands* move : pending_moves_) {
|
|
if (move->Blocks(loc)) {
|
|
return true;
|
|
}
|
|
}
|
|
for (MoveOperands* move : moves_) {
|
|
if (move->Blocks(loc)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// So far it is only used for debugging purposes to make sure all pending moves
|
|
// have been performed.
|
|
size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() {
|
|
return pending_moves_.size();
|
|
}
|
|
|
|
} // namespace art
|