You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
362 lines
14 KiB
362 lines
14 KiB
/*
|
|
* Copyright (C) 2015 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "instruction_simplifier_shared.h"
|
|
|
|
#include "mirror/array-inl.h"
|
|
|
|
namespace art {
|
|
|
|
namespace {
|
|
|
|
bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
|
|
HBinaryOperation* input_binop,
|
|
HInstruction* input_other) {
|
|
DCHECK(DataType::IsIntOrLongType(mul->GetType()));
|
|
DCHECK(input_binop->IsAdd() || input_binop->IsSub());
|
|
DCHECK_NE(input_binop, input_other);
|
|
if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
|
|
return false;
|
|
}
|
|
|
|
// Try to interpret patterns like
|
|
// a * (b <+/-> 1)
|
|
// as
|
|
// (a * b) <+/-> a
|
|
HInstruction* input_a = input_other;
|
|
HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize.
|
|
HInstruction::InstructionKind op_kind;
|
|
|
|
if (input_binop->IsAdd()) {
|
|
if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
|
|
// Interpret
|
|
// a * (b + 1)
|
|
// as
|
|
// (a * b) + a
|
|
input_b = input_binop->GetLeastConstantLeft();
|
|
op_kind = HInstruction::kAdd;
|
|
}
|
|
} else {
|
|
DCHECK(input_binop->IsSub());
|
|
if (input_binop->GetRight()->IsConstant() &&
|
|
input_binop->GetRight()->AsConstant()->IsMinusOne()) {
|
|
// Interpret
|
|
// a * (b - (-1))
|
|
// as
|
|
// a + (a * b)
|
|
input_b = input_binop->GetLeft();
|
|
op_kind = HInstruction::kAdd;
|
|
} else if (input_binop->GetLeft()->IsConstant() &&
|
|
input_binop->GetLeft()->AsConstant()->IsOne()) {
|
|
// Interpret
|
|
// a * (1 - b)
|
|
// as
|
|
// a - (a * b)
|
|
input_b = input_binop->GetRight();
|
|
op_kind = HInstruction::kSub;
|
|
}
|
|
}
|
|
|
|
if (input_b == nullptr) {
|
|
// We did not find a pattern we can optimize.
|
|
return false;
|
|
}
|
|
|
|
ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator();
|
|
HMultiplyAccumulate* mulacc = new (allocator) HMultiplyAccumulate(
|
|
mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
|
|
|
|
mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
|
|
input_binop->GetBlock()->RemoveInstruction(input_binop);
|
|
|
|
return true;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) {
|
|
DataType::Type type = mul->GetType();
|
|
switch (isa) {
|
|
case InstructionSet::kArm:
|
|
case InstructionSet::kThumb2:
|
|
if (type != DataType::Type::kInt32) {
|
|
return false;
|
|
}
|
|
break;
|
|
case InstructionSet::kArm64:
|
|
if (!DataType::IsIntOrLongType(type)) {
|
|
return false;
|
|
}
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator();
|
|
|
|
if (mul->HasOnlyOneNonEnvironmentUse()) {
|
|
HInstruction* use = mul->GetUses().front().GetUser();
|
|
if (use->IsAdd() || use->IsSub()) {
|
|
// Replace code looking like
|
|
// MUL tmp, x, y
|
|
// SUB dst, acc, tmp
|
|
// with
|
|
// MULSUB dst, acc, x, y
|
|
// Note that we do not want to (unconditionally) perform the merge when the
|
|
// multiplication has multiple uses and it can be merged in all of them.
|
|
// Multiple uses could happen on the same control-flow path, and we would
|
|
// then increase the amount of work. In the future we could try to evaluate
|
|
// whether all uses are on different control-flow paths (using dominance and
|
|
// reverse-dominance information) and only perform the merge when they are.
|
|
HInstruction* accumulator = nullptr;
|
|
HBinaryOperation* binop = use->AsBinaryOperation();
|
|
HInstruction* binop_left = binop->GetLeft();
|
|
HInstruction* binop_right = binop->GetRight();
|
|
// Be careful after GVN. This should not happen since the `HMul` has only
|
|
// one use.
|
|
DCHECK_NE(binop_left, binop_right);
|
|
if (binop_right == mul) {
|
|
accumulator = binop_left;
|
|
} else if (use->IsAdd()) {
|
|
DCHECK_EQ(binop_left, mul);
|
|
accumulator = binop_right;
|
|
}
|
|
|
|
if (accumulator != nullptr) {
|
|
HMultiplyAccumulate* mulacc =
|
|
new (allocator) HMultiplyAccumulate(type,
|
|
binop->GetKind(),
|
|
accumulator,
|
|
mul->GetLeft(),
|
|
mul->GetRight());
|
|
|
|
binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
|
|
DCHECK(!mul->HasUses());
|
|
mul->GetBlock()->RemoveInstruction(mul);
|
|
return true;
|
|
}
|
|
} else if (use->IsNeg() && isa != InstructionSet::kArm) {
|
|
HMultiplyAccumulate* mulacc =
|
|
new (allocator) HMultiplyAccumulate(type,
|
|
HInstruction::kSub,
|
|
mul->GetBlock()->GetGraph()->GetConstant(type, 0),
|
|
mul->GetLeft(),
|
|
mul->GetRight());
|
|
|
|
use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc);
|
|
DCHECK(!mul->HasUses());
|
|
mul->GetBlock()->RemoveInstruction(mul);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// Use multiply accumulate instruction for a few simple patterns.
|
|
// We prefer not applying the following transformations if the left and
|
|
// right inputs perform the same operation.
|
|
// We rely on GVN having squashed the inputs if appropriate. However the
|
|
// results are still correct even if that did not happen.
|
|
if (mul->GetLeft() == mul->GetRight()) {
|
|
return false;
|
|
}
|
|
|
|
HInstruction* left = mul->GetLeft();
|
|
HInstruction* right = mul->GetRight();
|
|
if ((right->IsAdd() || right->IsSub()) &&
|
|
TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) {
|
|
return true;
|
|
}
|
|
if ((left->IsAdd() || left->IsSub()) &&
|
|
TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool TryMergeNegatedInput(HBinaryOperation* op) {
|
|
DCHECK(op->IsAnd() || op->IsOr() || op->IsXor()) << op->DebugName();
|
|
HInstruction* left = op->GetLeft();
|
|
HInstruction* right = op->GetRight();
|
|
|
|
// Only consider the case where there is exactly one Not, with 2 Not's De
|
|
// Morgan's laws should be applied instead.
|
|
if (left->IsNot() ^ right->IsNot()) {
|
|
HInstruction* hnot = (left->IsNot() ? left : right);
|
|
HInstruction* hother = (left->IsNot() ? right : left);
|
|
|
|
// Only do the simplification if the Not has only one use and can thus be
|
|
// safely removed. Even though ARM64 negated bitwise operations do not have
|
|
// an immediate variant (only register), we still do the simplification when
|
|
// `hother` is a constant, because it removes an instruction if the constant
|
|
// cannot be encoded as an immediate:
|
|
// mov r0, #large_constant
|
|
// neg r2, r1
|
|
// and r0, r0, r2
|
|
// becomes:
|
|
// mov r0, #large_constant
|
|
// bic r0, r0, r1
|
|
if (hnot->HasOnlyOneNonEnvironmentUse()) {
|
|
// Replace code looking like
|
|
// NOT tmp, mask
|
|
// AND dst, src, tmp (respectively ORR, EOR)
|
|
// with
|
|
// BIC dst, src, mask (respectively ORN, EON)
|
|
HInstruction* src = hnot->AsNot()->GetInput();
|
|
|
|
HBitwiseNegatedRight* neg_op = new (hnot->GetBlock()->GetGraph()->GetAllocator())
|
|
HBitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc());
|
|
|
|
op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op);
|
|
hnot->GetBlock()->RemoveInstruction(hnot);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
bool TryExtractArrayAccessAddress(HInstruction* access,
|
|
HInstruction* array,
|
|
HInstruction* index,
|
|
size_t data_offset) {
|
|
if (index->IsConstant() ||
|
|
(index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
|
|
// When the index is a constant all the addressing can be fitted in the
|
|
// memory access instruction, so do not split the access.
|
|
return false;
|
|
}
|
|
if (access->IsArraySet() &&
|
|
access->AsArraySet()->GetValue()->GetType() == DataType::Type::kReference) {
|
|
// The access may require a runtime call or the original array pointer.
|
|
return false;
|
|
}
|
|
if (kEmitCompilerReadBarrier &&
|
|
!kUseBakerReadBarrier &&
|
|
access->IsArrayGet() &&
|
|
access->GetType() == DataType::Type::kReference) {
|
|
// For object arrays, the non-Baker read barrier instrumentation requires
|
|
// the original array pointer.
|
|
return false;
|
|
}
|
|
|
|
// Proceed to extract the base address computation.
|
|
HGraph* graph = access->GetBlock()->GetGraph();
|
|
ArenaAllocator* allocator = graph->GetAllocator();
|
|
|
|
HIntConstant* offset = graph->GetIntConstant(data_offset);
|
|
HIntermediateAddress* address = new (allocator) HIntermediateAddress(array, offset, kNoDexPc);
|
|
// TODO: Is it ok to not have this on the intermediate address?
|
|
// address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
|
|
access->GetBlock()->InsertInstructionBefore(address, access);
|
|
access->ReplaceInput(address, 0);
|
|
// Both instructions must depend on GC to prevent any instruction that can
|
|
// trigger GC to be inserted between the two.
|
|
access->AddSideEffects(SideEffects::DependsOnGC());
|
|
DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
|
|
DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
|
|
// TODO: Code generation for HArrayGet and HArraySet will check whether the input address
|
|
// is an HIntermediateAddress and generate appropriate code.
|
|
// We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
|
|
// `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes
|
|
// because these new instructions would not bring any advantages yet.
|
|
// Also see the comments in
|
|
// `InstructionCodeGeneratorARMVIXL::VisitArrayGet()`
|
|
// `InstructionCodeGeneratorARMVIXL::VisitArraySet()`
|
|
// `InstructionCodeGeneratorARM64::VisitArrayGet()`
|
|
// `InstructionCodeGeneratorARM64::VisitArraySet()`.
|
|
return true;
|
|
}
|
|
|
|
bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
|
|
if (index->IsConstant()) {
|
|
// If index is constant the whole address calculation often can be done by LDR/STR themselves.
|
|
// TODO: Treat the case with not-embedable constant.
|
|
return false;
|
|
}
|
|
|
|
HGraph* graph = access->GetBlock()->GetGraph();
|
|
ArenaAllocator* allocator = graph->GetAllocator();
|
|
DataType::Type packed_type = access->GetPackedType();
|
|
uint32_t data_offset = mirror::Array::DataOffset(
|
|
DataType::Size(packed_type)).Uint32Value();
|
|
size_t component_shift = DataType::SizeShift(packed_type);
|
|
|
|
bool is_extracting_beneficial = false;
|
|
// It is beneficial to extract index intermediate address only if there are at least 2 users.
|
|
for (const HUseListNode<HInstruction*>& use : index->GetUses()) {
|
|
HInstruction* user = use.GetUser();
|
|
if (user->IsVecMemoryOperation() && user != access) {
|
|
HVecMemoryOperation* another_access = user->AsVecMemoryOperation();
|
|
DataType::Type another_packed_type = another_access->GetPackedType();
|
|
uint32_t another_data_offset = mirror::Array::DataOffset(
|
|
DataType::Size(another_packed_type)).Uint32Value();
|
|
size_t another_component_shift = DataType::SizeShift(another_packed_type);
|
|
if (another_data_offset == data_offset && another_component_shift == component_shift) {
|
|
is_extracting_beneficial = true;
|
|
break;
|
|
}
|
|
} else if (user->IsIntermediateAddressIndex()) {
|
|
HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex();
|
|
uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue();
|
|
size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue();
|
|
if (another_data_offset == data_offset && another_component_shift == component_shift) {
|
|
is_extracting_beneficial = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!is_extracting_beneficial) {
|
|
return false;
|
|
}
|
|
|
|
// Proceed to extract the index + data_offset address computation.
|
|
HIntConstant* offset = graph->GetIntConstant(data_offset);
|
|
HIntConstant* shift = graph->GetIntConstant(component_shift);
|
|
HIntermediateAddressIndex* address =
|
|
new (allocator) HIntermediateAddressIndex(index, offset, shift, kNoDexPc);
|
|
|
|
access->GetBlock()->InsertInstructionBefore(address, access);
|
|
access->ReplaceInput(address, 1);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool TryReplaceSubSubWithSubAdd(HSub* last_sub) {
|
|
DCHECK(last_sub->GetRight()->IsSub());
|
|
HBasicBlock* basic_block = last_sub->GetBlock();
|
|
ArenaAllocator* allocator = basic_block->GetGraph()->GetAllocator();
|
|
HInstruction* last_sub_right = last_sub->GetRight();
|
|
HInstruction* last_sub_left = last_sub->GetLeft();
|
|
if (last_sub_right->GetUses().HasExactlyOneElement()) {
|
|
// Reorder operands of last_sub_right: Sub(a, b) -> Sub(b, a).
|
|
HInstruction* a = last_sub_right->InputAt(0);
|
|
HInstruction* b = last_sub_right->InputAt(1);
|
|
last_sub_right->ReplaceInput(b, 0);
|
|
last_sub_right->ReplaceInput(a, 1);
|
|
|
|
// Replace Sub(c, Sub(a, b)) with Add(c, Sub(b, a).
|
|
HAdd* add = new (allocator) HAdd(last_sub->GetType(), last_sub_left, last_sub_right);
|
|
basic_block->ReplaceAndRemoveInstructionWith(last_sub, add);
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
} // namespace art
|