v811_spc009/art/compiler/optimizing/code_generator.h

/*
 * Copyright (C) 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_

#include "arch/instruction_set.h"
#include "arch/instruction_set_features.h"
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/array_ref.h"
#include "base/bit_field.h"
#include "base/bit_utils.h"
#include "base/enums.h"
#include "base/globals.h"
#include "base/memory_region.h"
#include "class_root.h"
#include "dex/string_reference.h"
#include "dex/type_reference.h"
#include "graph_visualizer.h"
#include "locations.h"
#include "nodes.h"
#include "optimizing_compiler_stats.h"
#include "read_barrier_option.h"
#include "stack.h"
#include "utils/label.h"

namespace art {

// Binary encoding of 2^32 for type double.
static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
// Binary encoding of 2^31 for type double.
static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);

// Minimum value for a primitive integer.
static int32_t constexpr kPrimIntMin = 0x80000000;
// Minimum value for a primitive long.
static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000);

// Maximum value for a primitive integer.
static int32_t constexpr kPrimIntMax = 0x7fffffff;
// Maximum value for a primitive long.
static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);

static constexpr ReadBarrierOption kCompilerReadBarrierOption =
    kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;

class Assembler;
class CodeGenerator;
class CompilerOptions;
class StackMapStream;
class ParallelMoveResolver;

namespace linker {
class LinkerPatch;
}  // namespace linker

class CodeAllocator {
 public:
  CodeAllocator() {}
  virtual ~CodeAllocator() {}

  virtual uint8_t* Allocate(size_t size) = 0;
  virtual ArrayRef<const uint8_t> GetMemory() const = 0;

 private:
  DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
};

class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
 public:
  explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
    for (size_t i = 0; i < kMaximumNumberOfExpectedRegisters; ++i) {
      saved_core_stack_offsets_[i] = kRegisterNotSaved;
      saved_fpu_stack_offsets_[i] = kRegisterNotSaved;
    }
  }

  virtual ~SlowPathCode() {}

  virtual void EmitNativeCode(CodeGenerator* codegen) = 0;

  // Save live core and floating-point caller-save registers and
  // update the stack mask in `locations` for registers holding object
  // references.
  virtual void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);
  // Restore live core and floating-point caller-save registers.
  virtual void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations);

  bool IsCoreRegisterSaved(int reg) const {
    return saved_core_stack_offsets_[reg] != kRegisterNotSaved;
  }

  bool IsFpuRegisterSaved(int reg) const {
    return saved_fpu_stack_offsets_[reg] != kRegisterNotSaved;
  }

  uint32_t GetStackOffsetOfCoreRegister(int reg) const {
    return saved_core_stack_offsets_[reg];
  }

  uint32_t GetStackOffsetOfFpuRegister(int reg) const {
    return saved_fpu_stack_offsets_[reg];
  }

  virtual bool IsFatal() const { return false; }

  virtual const char* GetDescription() const = 0;

  Label* GetEntryLabel() { return &entry_label_; }
  Label* GetExitLabel() { return &exit_label_; }

  HInstruction* GetInstruction() const {
    return instruction_;
  }

  uint32_t GetDexPc() const {
    return instruction_ != nullptr ? instruction_->GetDexPc() : kNoDexPc;
  }

 protected:
  static constexpr size_t kMaximumNumberOfExpectedRegisters = 32;
  static constexpr uint32_t kRegisterNotSaved = -1;
  // The instruction where this slow path is happening.
  HInstruction* instruction_;
  uint32_t saved_core_stack_offsets_[kMaximumNumberOfExpectedRegisters];
  uint32_t saved_fpu_stack_offsets_[kMaximumNumberOfExpectedRegisters];

 private:
  Label entry_label_;
  Label exit_label_;

  DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
};

class InvokeDexCallingConventionVisitor {
 public:
  virtual Location GetNextLocation(DataType::Type type) = 0;
  virtual Location GetReturnLocation(DataType::Type type) const = 0;
  virtual Location GetMethodLocation() const = 0;

 protected:
  InvokeDexCallingConventionVisitor() {}
  virtual ~InvokeDexCallingConventionVisitor() {}

  // The current index for core registers.
  uint32_t gp_index_ = 0u;
  // The current index for floating-point registers.
  uint32_t float_index_ = 0u;
  // The current stack index.
  uint32_t stack_index_ = 0u;

 private:
  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
};

class FieldAccessCallingConvention {
 public:
  virtual Location GetObjectLocation() const = 0;
  virtual Location GetFieldIndexLocation() const = 0;
  virtual Location GetReturnLocation(DataType::Type type) const = 0;
  virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0;
  virtual Location GetFpuLocation(DataType::Type type) const = 0;
  virtual ~FieldAccessCallingConvention() {}

 protected:
  FieldAccessCallingConvention() {}

 private:
  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConvention);
};

class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
 public:
  // Compiles the graph to executable instructions.
  void Compile(CodeAllocator* allocator);
  static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
                                               const CompilerOptions& compiler_options,
                                               OptimizingCompilerStats* stats = nullptr);
  virtual ~CodeGenerator();

  // Get the graph. This is the outermost graph, never the graph of a method being inlined.
  HGraph* GetGraph() const { return graph_; }

  HBasicBlock* GetNextBlockToEmit() const;
  HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
  bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;

  size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
    // Note that this follows the current calling convention.
    return GetFrameSize()
        + static_cast<size_t>(InstructionSetPointerSize(GetInstructionSet()))  // Art method
        + parameter->GetIndex() * kVRegSize;
  }

  virtual void Initialize() = 0;
  virtual void Finalize(CodeAllocator* allocator);
  virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
  virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
  virtual void EmitThunkCode(const linker::LinkerPatch& patch,
                             /*out*/ ArenaVector<uint8_t>* code,
                             /*out*/ std::string* debug_name);
  virtual void GenerateFrameEntry() = 0;
  virtual void GenerateFrameExit() = 0;
  virtual void Bind(HBasicBlock* block) = 0;
  virtual void MoveConstant(Location destination, int32_t value) = 0;
  virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0;
  virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0;

  virtual Assembler* GetAssembler() = 0;
  virtual const Assembler& GetAssembler() const = 0;
  virtual size_t GetWordSize() const = 0;

  // Returns whether the target supports predicated SIMD instructions.
  virtual bool SupportsPredicatedSIMD() const { return false; }

  // Get FP register width in bytes for spilling/restoring in the slow paths.
  //
  // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers
  // alias and live SIMD registers are forced to be spilled in full size in the slow paths.
  virtual size_t GetSlowPathFPWidth() const {
    // Default implementation.
    return GetCalleePreservedFPWidth();
  }

  // Get FP register width required to be preserved by the target ABI.
  virtual size_t GetCalleePreservedFPWidth() const  = 0;

  // Get the size of the target SIMD register in bytes.
  virtual size_t GetSIMDRegisterWidth() const = 0;
  virtual uintptr_t GetAddressOf(HBasicBlock* block) = 0;
  void InitializeCodeGeneration(size_t number_of_spill_slots,
                                size_t maximum_safepoint_spill_size,
                                size_t number_of_out_slots,
                                const ArenaVector<HBasicBlock*>& block_order);
  // Backends can override this as necessary. For most, no special alignment is required.
  virtual uint32_t GetPreferredSlotsAlignment() const { return 1; }

  uint32_t GetFrameSize() const { return frame_size_; }
  void SetFrameSize(uint32_t size) { frame_size_ = size; }
  uint32_t GetCoreSpillMask() const { return core_spill_mask_; }
  uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; }

  size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; }
  size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; }
  virtual void SetupBlockedRegisters() const = 0;

  virtual void ComputeSpillMask() {
    core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
    DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
    fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
  }

  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
    uint32_t mask = 0;
    for (size_t i = 0, e = length; i < e; ++i) {
      mask |= (1 << registers[i]);
    }
    return mask;
  }

  virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
  virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
  virtual InstructionSet GetInstructionSet() const = 0;

  const CompilerOptions& GetCompilerOptions() const { return compiler_options_; }

  // Saves the register in the stack. Returns the size taken on stack.
  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0;
  // Restores the register from the stack. Returns the size taken on stack.
  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) = 0;

  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;
  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0;

  virtual bool NeedsTwoRegisters(DataType::Type type) const = 0;
  // Returns whether we should split long moves in parallel moves.
  virtual bool ShouldSplitLongMoves() const { return false; }

  size_t GetNumberOfCoreCalleeSaveRegisters() const {
    return POPCOUNT(core_callee_save_mask_);
  }

  size_t GetNumberOfCoreCallerSaveRegisters() const {
    DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
    return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
  }

  bool IsCoreCalleeSaveRegister(int reg) const {
    return (core_callee_save_mask_ & (1 << reg)) != 0;
  }

  bool IsFloatingPointCalleeSaveRegister(int reg) const {
    return (fpu_callee_save_mask_ & (1 << reg)) != 0;
  }

  uint32_t GetSlowPathSpills(LocationSummary* locations, bool core_registers) const {
    DCHECK(locations->OnlyCallsOnSlowPath() ||
           (locations->Intrinsified() && locations->CallsOnMainAndSlowPath() &&
               !locations->HasCustomSlowPathCallingConvention()));
    uint32_t live_registers = core_registers
        ? locations->GetLiveRegisters()->GetCoreRegisters()
        : locations->GetLiveRegisters()->GetFloatingPointRegisters();
    if (locations->HasCustomSlowPathCallingConvention()) {
      // Save only the live registers that the custom calling convention wants us to save.
      uint32_t caller_saves = core_registers
          ? locations->GetCustomSlowPathCallerSaves().GetCoreRegisters()
          : locations->GetCustomSlowPathCallerSaves().GetFloatingPointRegisters();
      return live_registers & caller_saves;
    } else {
      // Default ABI, we need to spill non-callee-save live registers.
      uint32_t callee_saves = core_registers ? core_callee_save_mask_ : fpu_callee_save_mask_;
      return live_registers & ~callee_saves;
    }
  }

  size_t GetNumberOfSlowPathSpills(LocationSummary* locations, bool core_registers) const {
    return POPCOUNT(GetSlowPathSpills(locations, core_registers));
  }

  size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
    DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
    DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
    return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
  }

  // Record native to dex mapping for a suspend point. Required by runtime.
  void RecordPcInfo(HInstruction* instruction,
                    uint32_t dex_pc,
                    uint32_t native_pc,
                    SlowPathCode* slow_path = nullptr,
                    bool native_debug_info = false);

  // Record native to dex mapping for a suspend point.
  // The native_pc is used from Assembler::CodePosition.
  //
  // Note: As Assembler::CodePosition is target dependent, it does not guarantee the exact native_pc
  // for the instruction. If the exact native_pc is required it must be provided explicitly.
  void RecordPcInfo(HInstruction* instruction,
                    uint32_t dex_pc,
                    SlowPathCode* slow_path = nullptr,
                    bool native_debug_info = false);

  // Check whether we have already recorded mapping at this PC.
  bool HasStackMapAtCurrentPc();

  // Record extra stack maps if we support native debugging.
  //
  // ARM specific behaviour: The recorded native PC might be a branch over pools to instructions
  // corresponding the dex PC.
  void MaybeRecordNativeDebugInfo(HInstruction* instruction,
                                  uint32_t dex_pc,
                                  SlowPathCode* slow_path = nullptr);

  bool CanMoveNullCheckToUser(HNullCheck* null_check);
  virtual void MaybeRecordImplicitNullCheck(HInstruction* instruction);
  LocationSummary* CreateThrowingSlowPathLocations(
      HInstruction* instruction, RegisterSet caller_saves = RegisterSet::Empty());
  void GenerateNullCheck(HNullCheck* null_check);
  virtual void GenerateImplicitNullCheck(HNullCheck* null_check) = 0;
  virtual void GenerateExplicitNullCheck(HNullCheck* null_check) = 0;

  // Records a stack map which the runtime might use to set catch phi values
  // during exception delivery.
  // TODO: Replace with a catch-entering instruction that records the environment.
  void RecordCatchBlockInfo();

  // Get the ScopedArenaAllocator used for codegen memory allocation.
  ScopedArenaAllocator* GetScopedAllocator();

  void AddSlowPath(SlowPathCode* slow_path);

  ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check);
  size_t GetNumberOfJitRoots() const;

  // Fills the `literals` array with literals collected during code generation.
  // Also emits literal patches.
  void EmitJitRoots(uint8_t* code,
                    const uint8_t* roots_data,
                    /*out*/std::vector<Handle<mirror::Object>>* roots)
      REQUIRES_SHARED(Locks::mutator_lock_);

  bool IsLeafMethod() const {
    return is_leaf_;
  }

  void MarkNotLeaf() {
    is_leaf_ = false;
    requires_current_method_ = true;
  }

  void SetRequiresCurrentMethod() {
    requires_current_method_ = true;
  }

  bool RequiresCurrentMethod() const {
    return requires_current_method_;
  }

  // Clears the spill slots taken by loop phis in the `LocationSummary` of the
  // suspend check. This is called when the code generator generates code
  // for the suspend check at the back edge (instead of where the suspend check
  // is, which is the loop entry). At this point, the spill slots for the phis
  // have not been written to.
  void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check,
                                             HParallelMove* spills) const;

  bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; }
  bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; }

  bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; }
  bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; }

  // Helper that returns the offset of the array's length field.
  // Note: Besides the normal arrays, we also use the HArrayLength for
  // accessing the String's `count` field in String intrinsics.
  static uint32_t GetArrayLengthOffset(HArrayLength* array_length);

  // Helper that returns the offset of the array's data.
  // Note: Besides the normal arrays, we also use the HArrayGet for
  // accessing the String's `value` field in String intrinsics.
  static uint32_t GetArrayDataOffset(HArrayGet* array_get);

  void EmitParallelMoves(Location from1,
                         Location to1,
                         DataType::Type type1,
                         Location from2,
                         Location to2,
                         DataType::Type type2);

  static bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) {
    // Used only for kExactCheck, kAbstractClassCheck, kClassHierarchyCheck and kArrayObjectCheck.
    DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck ||
           instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck ||
           instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck ||
           instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck)
        << instance_of->GetTypeCheckKind();
    // If the target class is in the boot image, it's non-moveable and it doesn't matter
    // if we compare it with a from-space or to-space reference, the result is the same.
    // It's OK to traverse a class hierarchy jumping between from-space and to-space.
    return kEmitCompilerReadBarrier && !instance_of->GetTargetClass()->IsInBootImage();
  }

  static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) {
    return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier;
  }

  static bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) {
    switch (check_cast->GetTypeCheckKind()) {
      case TypeCheckKind::kExactCheck:
      case TypeCheckKind::kAbstractClassCheck:
      case TypeCheckKind::kClassHierarchyCheck:
      case TypeCheckKind::kArrayObjectCheck:
      case TypeCheckKind::kInterfaceCheck: {
        bool needs_read_barrier =
            kEmitCompilerReadBarrier && !check_cast->GetTargetClass()->IsInBootImage();
        // We do not emit read barriers for HCheckCast, so we can get false negatives
        // and the slow path shall re-check and simply return if the cast is actually OK.
        return !needs_read_barrier;
      }
      case TypeCheckKind::kArrayCheck:
      case TypeCheckKind::kUnresolvedCheck:
        return false;
      case TypeCheckKind::kBitstringCheck:
        return true;
    }
    LOG(FATAL) << "Unreachable";
    UNREACHABLE();
  }

  static LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) {
    return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock())
        ? LocationSummary::kNoCall  // In fact, call on a fatal (non-returning) slow path.
        : LocationSummary::kCallOnSlowPath;
  }

  static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) {
    // Check that null value is not represented as an integer constant.
    DCHECK(type != DataType::Type::kReference || !value->IsIntConstant());
    return type == DataType::Type::kReference && !value->IsNullConstant();
  }


  // Performs checks pertaining to an InvokeRuntime call.
  void ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
                             HInstruction* instruction,
                             SlowPathCode* slow_path);

  // Performs checks pertaining to an InvokeRuntimeWithoutRecordingPcInfo call.
  static void ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* instruction,
                                                          SlowPathCode* slow_path);

  void AddAllocatedRegister(Location location) {
    allocated_registers_.Add(location);
  }

  bool HasAllocatedRegister(bool is_core, int reg) const {
    return is_core
        ? allocated_registers_.ContainsCoreRegister(reg)
        : allocated_registers_.ContainsFloatingPointRegister(reg);
  }

  void AllocateLocations(HInstruction* instruction);

  // Tells whether the stack frame of the compiled method is
  // considered "empty", that is either actually having a size of zero,
  // or just containing the saved return address register.
  bool HasEmptyFrame() const {
    return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
  }

  static int8_t GetInt8ValueOf(HConstant* constant) {
    DCHECK(constant->IsIntConstant());
    return constant->AsIntConstant()->GetValue();
  }

  static int16_t GetInt16ValueOf(HConstant* constant) {
    DCHECK(constant->IsIntConstant());
    return constant->AsIntConstant()->GetValue();
  }

  static int32_t GetInt32ValueOf(HConstant* constant) {
    if (constant->IsIntConstant()) {
      return constant->AsIntConstant()->GetValue();
    } else if (constant->IsNullConstant()) {
      return 0;
    } else {
      DCHECK(constant->IsFloatConstant());
      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
    }
  }

  static int64_t GetInt64ValueOf(HConstant* constant) {
    if (constant->IsIntConstant()) {
      return constant->AsIntConstant()->GetValue();
    } else if (constant->IsNullConstant()) {
      return 0;
    } else if (constant->IsFloatConstant()) {
      return bit_cast<int32_t, float>(constant->AsFloatConstant()->GetValue());
    } else if (constant->IsLongConstant()) {
      return constant->AsLongConstant()->GetValue();
    } else {
      DCHECK(constant->IsDoubleConstant());
      return bit_cast<int64_t, double>(constant->AsDoubleConstant()->GetValue());
    }
  }

  size_t GetFirstRegisterSlotInSlowPath() const {
    return first_register_slot_in_slow_path_;
  }

  uint32_t FrameEntrySpillSize() const {
    return GetFpuSpillSize() + GetCoreSpillSize();
  }

  virtual ParallelMoveResolver* GetMoveResolver() = 0;

  static void CreateCommonInvokeLocationSummary(
      HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);

  template <typename CriticalNativeCallingConventionVisitor,
            size_t kNativeStackAlignment,
            size_t GetCriticalNativeDirectCallFrameSize(const char* shorty, uint32_t shorty_len)>
  size_t PrepareCriticalNativeCall(HInvokeStaticOrDirect* invoke) {
      DCHECK(!invoke->GetLocations()->Intrinsified());
      CriticalNativeCallingConventionVisitor calling_convention_visitor(
          /*for_register_allocation=*/ false);
      HParallelMove parallel_move(GetGraph()->GetAllocator());
      PrepareCriticalNativeArgumentMoves(invoke, &calling_convention_visitor, &parallel_move);
      size_t out_frame_size =
          RoundUp(calling_convention_visitor.GetStackOffset(), kNativeStackAlignment);
      if (kIsDebugBuild) {
        uint32_t shorty_len;
        const char* shorty = GetCriticalNativeShorty(invoke, &shorty_len);
        DCHECK_EQ(GetCriticalNativeDirectCallFrameSize(shorty, shorty_len), out_frame_size);
      }
      if (out_frame_size != 0u) {
        FinishCriticalNativeFrameSetup(out_frame_size, &parallel_move);
      }
      return out_frame_size;
  }

  void GenerateInvokeStaticOrDirectRuntimeCall(
      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);

  void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);

  void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke, SlowPathCode* slow_path = nullptr);

  void GenerateInvokeCustomCall(HInvokeCustom* invoke);

  void CreateStringBuilderAppendLocations(HStringBuilderAppend* instruction, Location out);

  void CreateUnresolvedFieldLocationSummary(
      HInstruction* field_access,
      DataType::Type field_type,
      const FieldAccessCallingConvention& calling_convention);

  void GenerateUnresolvedFieldAccess(
      HInstruction* field_access,
      DataType::Type field_type,
      uint32_t field_index,
      uint32_t dex_pc,
      const FieldAccessCallingConvention& calling_convention);

  static void CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
                                                        Location runtime_type_index_location,
                                                        Location runtime_return_location);
  void GenerateLoadClassRuntimeCall(HLoadClass* cls);

  static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle,
                                                             Location runtime_handle_index_location,
                                                             Location runtime_return_location);
  void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle);

  static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type,
                                                             Location runtime_type_index_location,
                                                             Location runtime_return_location);
  void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type);

  static uint32_t GetBootImageOffset(ObjPtr<mirror::Object> object)
      REQUIRES_SHARED(Locks::mutator_lock_);
  static uint32_t GetBootImageOffset(HLoadClass* load_class);
  static uint32_t GetBootImageOffset(HLoadString* load_string);
  static uint32_t GetBootImageOffset(HInvoke* invoke);
  static uint32_t GetBootImageOffset(ClassRoot class_root);
  static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke);

  static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);

  void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
  DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }

  virtual void InvokeRuntime(QuickEntrypointEnum entrypoint,
                             HInstruction* instruction,
                             uint32_t dex_pc,
                             SlowPathCode* slow_path = nullptr) = 0;

  // Check if the desired_string_load_kind is supported. If it is, return it,
  // otherwise return a fall-back kind that should be used instead.
  virtual HLoadString::LoadKind GetSupportedLoadStringKind(
      HLoadString::LoadKind desired_string_load_kind) = 0;

  // Check if the desired_class_load_kind is supported. If it is, return it,
  // otherwise return a fall-back kind that should be used instead.
  virtual HLoadClass::LoadKind GetSupportedLoadClassKind(
      HLoadClass::LoadKind desired_class_load_kind) = 0;

  static LocationSummary::CallKind GetLoadStringCallKind(HLoadString* load) {
    switch (load->GetLoadKind()) {
      case HLoadString::LoadKind::kBssEntry:
        DCHECK(load->NeedsEnvironment());
        return LocationSummary::kCallOnSlowPath;
      case HLoadString::LoadKind::kRuntimeCall:
        DCHECK(load->NeedsEnvironment());
        return LocationSummary::kCallOnMainOnly;
      case HLoadString::LoadKind::kJitTableAddress:
        DCHECK(!load->NeedsEnvironment());
        return kEmitCompilerReadBarrier
            ? LocationSummary::kCallOnSlowPath
            : LocationSummary::kNoCall;
        break;
      default:
        DCHECK(!load->NeedsEnvironment());
        return LocationSummary::kNoCall;
    }
  }

  // Check if the desired_dispatch_info is supported. If it is, return it,
  // otherwise return a fall-back info that should be used instead.
  virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
      ArtMethod* method) = 0;

  // Generate a call to a static or direct method.
  virtual void GenerateStaticOrDirectCall(
      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
  // Generate a call to a virtual method.
  virtual void GenerateVirtualCall(
      HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;

  // Copy the result of a call into the given target.
  virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0;

  virtual void IncreaseFrame(size_t adjustment) = 0;
  virtual void DecreaseFrame(size_t adjustment) = 0;

  virtual void GenerateNop() = 0;

  static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array);

 protected:
  // Patch info used for recording locations of required linker patches and their targets,
  // i.e. target method, string, type or code identified by their dex file and index,
  // or .data.bimg.rel.ro entries identified by the boot image offset.
  template <typename LabelType>
  struct PatchInfo {
    PatchInfo(const DexFile* dex_file, uint32_t off_or_idx)
        : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { }

    // Target dex file or null for .data.bmig.rel.ro patches.
    const DexFile* target_dex_file;
    // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index.
    uint32_t offset_or_index;
    // Label for the instruction to patch.
    LabelType label;
  };

  CodeGenerator(HGraph* graph,
                size_t number_of_core_registers,
                size_t number_of_fpu_registers,
                size_t number_of_register_pairs,
                uint32_t core_callee_save_mask,
                uint32_t fpu_callee_save_mask,
                const CompilerOptions& compiler_options,
                OptimizingCompilerStats* stats);

  virtual HGraphVisitor* GetLocationBuilder() = 0;
  virtual HGraphVisitor* GetInstructionVisitor() = 0;

  // Returns the location of the first spilled entry for floating point registers,
  // relative to the stack pointer.
  uint32_t GetFpuSpillStart() const {
    return GetFrameSize() - FrameEntrySpillSize();
  }

  uint32_t GetFpuSpillSize() const {
    return POPCOUNT(fpu_spill_mask_) * GetCalleePreservedFPWidth();
  }

  uint32_t GetCoreSpillSize() const {
    return POPCOUNT(core_spill_mask_) * GetWordSize();
  }

  virtual bool HasAllocatedCalleeSaveRegisters() const {
    // We check the core registers against 1 because it always comprises the return PC.
    return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
      || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
  }

  bool CallPushesPC() const {
    InstructionSet instruction_set = GetInstructionSet();
    return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64;
  }

  // Arm64 has its own type for a label, so we need to templatize these methods
  // to share the logic.

  template <typename LabelType>
  LabelType* CommonInitializeLabels() {
    // We use raw array allocations instead of ArenaVector<> because Labels are
    // non-constructible and non-movable and as such cannot be held in a vector.
    size_t size = GetGraph()->GetBlocks().size();
    LabelType* labels =
        GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator);
    for (size_t i = 0; i != size; ++i) {
      new(labels + i) LabelType();
    }
    return labels;
  }

  template <typename LabelType>
  LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const {
    block = FirstNonEmptyBlock(block);
    return raw_pointer_to_labels_array + block->GetBlockId();
  }

  SlowPathCode* GetCurrentSlowPath() {
    return current_slow_path_;
  }

  StackMapStream* GetStackMapStream();

  void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string);
  uint64_t GetJitStringRootIndex(StringReference string_reference);
  void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass);
  uint64_t GetJitClassRootIndex(TypeReference type_reference);

  // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code.
  virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data);

  // Frame size required for this method.
  uint32_t frame_size_;
  uint32_t core_spill_mask_;
  uint32_t fpu_spill_mask_;
  uint32_t first_register_slot_in_slow_path_;

  // Registers that were allocated during linear scan.
  RegisterSet allocated_registers_;

  // Arrays used when doing register allocation to know which
  // registers we can allocate. `SetupBlockedRegisters` updates the
  // arrays.
  bool* const blocked_core_registers_;
  bool* const blocked_fpu_registers_;
  size_t number_of_core_registers_;
  size_t number_of_fpu_registers_;
  size_t number_of_register_pairs_;
  const uint32_t core_callee_save_mask_;
  const uint32_t fpu_callee_save_mask_;

  // The order to use for code generation.
  const ArenaVector<HBasicBlock*>* block_order_;

  DisassemblyInformation* disasm_info_;

 private:
  class CodeGenerationData;

  void InitializeCodeGenerationData();
  size_t GetStackOffsetOfSavedRegister(size_t index);
  void GenerateSlowPaths();
  void BlockIfInRegister(Location location, bool is_out = false) const;
  void EmitEnvironment(HEnvironment* environment,
                       SlowPathCode* slow_path,
                       bool needs_vreg_info = true);
  void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path);

  static void PrepareCriticalNativeArgumentMoves(
      HInvokeStaticOrDirect* invoke,
      /*inout*/InvokeDexCallingConventionVisitor* visitor,
      /*out*/HParallelMove* parallel_move);

  void FinishCriticalNativeFrameSetup(size_t out_frame_size, /*inout*/HParallelMove* parallel_move);

  static const char* GetCriticalNativeShorty(HInvokeStaticOrDirect* invoke, uint32_t* shorty_len);

  OptimizingCompilerStats* stats_;

  HGraph* const graph_;
  const CompilerOptions& compiler_options_;

  // The current slow-path that we're generating code for.
  SlowPathCode* current_slow_path_;

  // The current block index in `block_order_` of the block
  // we are generating code for.
  size_t current_block_index_;

  // Whether the method is a leaf method.
  bool is_leaf_;

  // Whether an instruction in the graph accesses the current method.
  // TODO: Rename: this actually indicates that some instruction in the method
  // needs the environment including a valid stack frame.
  bool requires_current_method_;

  // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the
  // ArenaStack memory allocated in previous passes instead of adding to the memory
  // held by the ArenaAllocator. This ScopedArenaAllocator is created in
  // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed.
  std::unique_ptr<CodeGenerationData> code_generation_data_;

  friend class OptimizingCFITest;
  ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD);
  ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD);

  DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
};

template <typename C, typename F>
class CallingConvention {
 public:
  CallingConvention(const C* registers,
                    size_t number_of_registers,
                    const F* fpu_registers,
                    size_t number_of_fpu_registers,
                    PointerSize pointer_size)
      : registers_(registers),
        number_of_registers_(number_of_registers),
        fpu_registers_(fpu_registers),
        number_of_fpu_registers_(number_of_fpu_registers),
        pointer_size_(pointer_size) {}

  size_t GetNumberOfRegisters() const { return number_of_registers_; }
  size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }

  C GetRegisterAt(size_t index) const {
    DCHECK_LT(index, number_of_registers_);
    return registers_[index];
  }

  F GetFpuRegisterAt(size_t index) const {
    DCHECK_LT(index, number_of_fpu_registers_);
    return fpu_registers_[index];
  }

  size_t GetStackOffsetOf(size_t index) const {
    // We still reserve the space for parameters passed by registers.
    // Add space for the method pointer.
    return static_cast<size_t>(pointer_size_) + index * kVRegSize;
  }

 private:
  const C* registers_;
  const size_t number_of_registers_;
  const F* fpu_registers_;
  const size_t number_of_fpu_registers_;
  const PointerSize pointer_size_;

  DISALLOW_COPY_AND_ASSIGN(CallingConvention);
};

/**
 * A templated class SlowPathGenerator with a templated method NewSlowPath()
 * that can be used by any code generator to share equivalent slow-paths with
 * the objective of reducing generated code size.
 *
 * InstructionType:  instruction that requires SlowPathCodeType
 * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *)
 */
template <typename InstructionType>
class SlowPathGenerator {
  static_assert(std::is_base_of<HInstruction, InstructionType>::value,
                "InstructionType is not a subclass of art::HInstruction");

 public:
  SlowPathGenerator(HGraph* graph, CodeGenerator* codegen)
      : graph_(graph),
        codegen_(codegen),
        slow_path_map_(std::less<uint32_t>(),
                       graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {}

  // Creates and adds a new slow-path, if needed, or returns existing one otherwise.
  // Templating the method (rather than the whole class) on the slow-path type enables
  // keeping this code at a generic, non architecture-specific place.
  //
  // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType.
  //       To relax this requirement, we would need some RTTI on the stored slow-paths,
  //       or template the class as a whole on SlowPathType.
  template <typename SlowPathCodeType>
  SlowPathCodeType* NewSlowPath(InstructionType* instruction) {
    static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value,
                  "SlowPathCodeType is not a subclass of art::SlowPathCode");
    static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value,
                  "SlowPathCodeType is not constructible from InstructionType*");
    // Iterate over potential candidates for sharing. Currently, only same-typed
    // slow-paths with exactly the same dex-pc are viable candidates.
    // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing?
    const uint32_t dex_pc = instruction->GetDexPc();
    auto iter = slow_path_map_.find(dex_pc);
    if (iter != slow_path_map_.end()) {
      const ArenaVector<std::pair<InstructionType*, SlowPathCode*>>& candidates = iter->second;
      for (const auto& it : candidates) {
        InstructionType* other_instruction = it.first;
        SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second);
        // Determine if the instructions allow for slow-path sharing.
        if (HaveSameLiveRegisters(instruction, other_instruction) &&
            HaveSameStackMap(instruction, other_instruction)) {
          // Can share: reuse existing one.
          return other_slow_path;
        }
      }
    } else {
      // First time this dex-pc is seen.
      iter = slow_path_map_.Put(dex_pc,
                                {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}});
    }
    // Cannot share: create and add new slow-path for this particular dex-pc.
    SlowPathCodeType* slow_path =
        new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction);
    iter->second.emplace_back(std::make_pair(instruction, slow_path));
    codegen_->AddSlowPath(slow_path);
    return slow_path;
  }

 private:
  // Tests if both instructions have same set of live physical registers. This ensures
  // the slow-path has exactly the same preamble on saving these registers to stack.
  bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const {
    const uint32_t core_spill = ~codegen_->GetCoreSpillMask();
    const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask();
    RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters();
    RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters();
    return (((live1->GetCoreRegisters() & core_spill) ==
             (live2->GetCoreRegisters() & core_spill)) &&
            ((live1->GetFloatingPointRegisters() & fpu_spill) ==
             (live2->GetFloatingPointRegisters() & fpu_spill)));
  }

  // Tests if both instructions have the same stack map. This ensures the interpreter
  // will find exactly the same dex-registers at the same entries.
  bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const {
    DCHECK(i1->HasEnvironment());
    DCHECK(i2->HasEnvironment());
    // We conservatively test if the two instructions find exactly the same instructions
    // and location in each dex-register. This guarantees they will have the same stack map.
    HEnvironment* e1 = i1->GetEnvironment();
    HEnvironment* e2 = i2->GetEnvironment();
    if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) {
      return false;
    }
    for (size_t i = 0, sz = e1->Size(); i < sz; ++i) {
      if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) ||
          !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) {
        return false;
      }
    }
    return true;
  }

  HGraph* const graph_;
  CodeGenerator* const codegen_;

  // Map from dex-pc to vector of already existing instruction/slow-path pairs.
  ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_;

  DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator);
};

class InstructionCodeGenerator : public HGraphVisitor {
 public:
  InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen)
      : HGraphVisitor(graph),
        deopt_slow_paths_(graph, codegen) {}

 protected:
  // Add slow-path generator for each instruction/slow-path combination that desires sharing.
  // TODO: under current regime, only deopt sharing make sense; extend later.
  SlowPathGenerator<HDeoptimize> deopt_slow_paths_;
};

}  // namespace art

#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_