src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
author eosterlund
Wed, 09 Oct 2019 12:30:06 +0000
changeset 58516 d376d86b0a01
parent 58273 08a5148e7c4e
child 58679 9c3209ff7550
permissions -rw-r--r--
8230565: ZGC: Redesign C2 load barrier to expand on the MachNode level Reviewed-by: pliden, stefank, neliasso Contributed-by: erik.osterlund@oracle.com, per.liden@oracle.com, stefan.karlsson@oracle.com, nils.eliasson@oracle.com

/*
 * Copyright (c) 2018, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "code/codeBlob.hpp"
#include "code/vmreg.inline.hpp"
#include "gc/z/zBarrier.inline.hpp"
#include "gc/z/zBarrierSet.hpp"
#include "gc/z/zBarrierSetAssembler.hpp"
#include "gc/z/zBarrierSetRuntime.hpp"
#include "memory/resourceArea.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/macros.hpp"
#ifdef COMPILER1
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
#include "gc/z/c1/zBarrierSetC1.hpp"
#endif // COMPILER1
#ifdef COMPILER2
#include "gc/z/c2/zBarrierSetC2.hpp"
#endif // COMPILER2

#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
#else
#define BLOCK_COMMENT(str) __ block_comment(str)
#endif

#undef __
#define __ masm->

static void call_vm(MacroAssembler* masm,
                    address entry_point,
                    Register arg0,
                    Register arg1) {
  // Setup arguments
  if (arg1 == c_rarg0) {
    if (arg0 == c_rarg1) {
      __ xchgptr(c_rarg1, c_rarg0);
    } else {
      __ movptr(c_rarg1, arg1);
      __ movptr(c_rarg0, arg0);
    }
  } else {
    if (arg0 != c_rarg0) {
      __ movptr(c_rarg0, arg0);
    }
    if (arg1 != c_rarg1) {
      __ movptr(c_rarg1, arg1);
    }
  }

  // Call VM
  __ MacroAssembler::call_VM_leaf_base(entry_point, 2);
}

void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
                                   DecoratorSet decorators,
                                   BasicType type,
                                   Register dst,
                                   Address src,
                                   Register tmp1,
                                   Register tmp_thread) {
  if (!ZBarrierSet::barrier_needed(decorators, type)) {
    // Barrier not needed
    BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
    return;
  }

  BLOCK_COMMENT("ZBarrierSetAssembler::load_at {");

  // Allocate scratch register
  Register scratch = tmp1;
  if (tmp1 == noreg) {
    scratch = r12;
    __ push(scratch);
  }

  assert_different_registers(dst, scratch);

  Label done;

  //
  // Fast Path
  //

  // Load address
  __ lea(scratch, src);

  // Load oop at address
  __ movptr(dst, Address(scratch, 0));

  // Test address bad mask
  __ testptr(dst, address_bad_mask_from_thread(r15_thread));
  __ jcc(Assembler::zero, done);

  //
  // Slow path
  //

  // Save registers
  __ push(rax);
  __ push(rcx);
  __ push(rdx);
  __ push(rdi);
  __ push(rsi);
  __ push(r8);
  __ push(r9);
  __ push(r10);
  __ push(r11);

  // We may end up here from generate_native_wrapper, then the method may have
  // floats as arguments, and we must spill them before calling the VM runtime
  // leaf. From the interpreter all floats are passed on the stack.
  assert(Argument::n_float_register_parameters_j == 8, "Assumption");
  const int xmm_size = wordSize * 2;
  const int xmm_spill_size = xmm_size * Argument::n_float_register_parameters_j;
  __ subptr(rsp, xmm_spill_size);
  __ movdqu(Address(rsp, xmm_size * 7), xmm7);
  __ movdqu(Address(rsp, xmm_size * 6), xmm6);
  __ movdqu(Address(rsp, xmm_size * 5), xmm5);
  __ movdqu(Address(rsp, xmm_size * 4), xmm4);
  __ movdqu(Address(rsp, xmm_size * 3), xmm3);
  __ movdqu(Address(rsp, xmm_size * 2), xmm2);
  __ movdqu(Address(rsp, xmm_size * 1), xmm1);
  __ movdqu(Address(rsp, xmm_size * 0), xmm0);

  // Call VM
  call_vm(masm, ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), dst, scratch);

  // Restore registers
  __ movdqu(xmm0, Address(rsp, xmm_size * 0));
  __ movdqu(xmm1, Address(rsp, xmm_size * 1));
  __ movdqu(xmm2, Address(rsp, xmm_size * 2));
  __ movdqu(xmm3, Address(rsp, xmm_size * 3));
  __ movdqu(xmm4, Address(rsp, xmm_size * 4));
  __ movdqu(xmm5, Address(rsp, xmm_size * 5));
  __ movdqu(xmm6, Address(rsp, xmm_size * 6));
  __ movdqu(xmm7, Address(rsp, xmm_size * 7));
  __ addptr(rsp, xmm_spill_size);

  __ pop(r11);
  __ pop(r10);
  __ pop(r9);
  __ pop(r8);
  __ pop(rsi);
  __ pop(rdi);
  __ pop(rdx);
  __ pop(rcx);

  if (dst == rax) {
    __ addptr(rsp, wordSize);
  } else {
    __ movptr(dst, rax);
    __ pop(rax);
  }

  __ bind(done);

  // Restore scratch register
  if (tmp1 == noreg) {
    __ pop(scratch);
  }

  BLOCK_COMMENT("} ZBarrierSetAssembler::load_at");
}

#ifdef ASSERT

void ZBarrierSetAssembler::store_at(MacroAssembler* masm,
                                    DecoratorSet decorators,
                                    BasicType type,
                                    Address dst,
                                    Register src,
                                    Register tmp1,
                                    Register tmp2) {
  BLOCK_COMMENT("ZBarrierSetAssembler::store_at {");

  // Verify oop store
  if (is_reference_type(type)) {
    // Note that src could be noreg, which means we
    // are storing null and can skip verification.
    if (src != noreg) {
      Label done;
      __ testptr(src, address_bad_mask_from_thread(r15_thread));
      __ jcc(Assembler::zero, done);
      __ stop("Verify oop store failed");
      __ should_not_reach_here();
      __ bind(done);
    }
  }

  // Store value
  BarrierSetAssembler::store_at(masm, decorators, type, dst, src, tmp1, tmp2);

  BLOCK_COMMENT("} ZBarrierSetAssembler::store_at");
}

#endif // ASSERT

void ZBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm,
                                              DecoratorSet decorators,
                                              BasicType type,
                                              Register src,
                                              Register dst,
                                              Register count) {
  if (!ZBarrierSet::barrier_needed(decorators, type)) {
    // Barrier not needed
    return;
  }

  BLOCK_COMMENT("ZBarrierSetAssembler::arraycopy_prologue {");

  // Save registers
  __ pusha();

  // Call VM
  call_vm(masm, ZBarrierSetRuntime::load_barrier_on_oop_array_addr(), src, count);

  // Restore registers
  __ popa();

  BLOCK_COMMENT("} ZBarrierSetAssembler::arraycopy_prologue");
}

void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
                                                         Register jni_env,
                                                         Register obj,
                                                         Register tmp,
                                                         Label& slowpath) {
  BLOCK_COMMENT("ZBarrierSetAssembler::try_resolve_jobject_in_native {");

  // Resolve jobject
  BarrierSetAssembler::try_resolve_jobject_in_native(masm, jni_env, obj, tmp, slowpath);

  // Test address bad mask
  __ testptr(obj, address_bad_mask_from_jni_env(jni_env));
  __ jcc(Assembler::notZero, slowpath);

  BLOCK_COMMENT("} ZBarrierSetAssembler::try_resolve_jobject_in_native");
}

#ifdef COMPILER1

#undef __
#define __ ce->masm()->

void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
                                                         LIR_Opr ref) const {
  __ testptr(ref->as_register(), address_bad_mask_from_thread(r15_thread));
}

void ZBarrierSetAssembler::generate_c1_load_barrier_stub(LIR_Assembler* ce,
                                                         ZLoadBarrierStubC1* stub) const {
  // Stub entry
  __ bind(*stub->entry());

  Register ref = stub->ref()->as_register();
  Register ref_addr = noreg;
  Register tmp = noreg;

  if (stub->tmp()->is_valid()) {
    // Load address into tmp register
    ce->leal(stub->ref_addr(), stub->tmp());
    ref_addr = tmp = stub->tmp()->as_pointer_register();
  } else {
    // Address already in register
    ref_addr = stub->ref_addr()->as_address_ptr()->base()->as_pointer_register();
  }

  assert_different_registers(ref, ref_addr, noreg);

  // Save rax unless it is the result or tmp register
  if (ref != rax && tmp != rax) {
    __ push(rax);
  }

  // Setup arguments and call runtime stub
  __ subptr(rsp, 2 * BytesPerWord);
  ce->store_parameter(ref_addr, 1);
  ce->store_parameter(ref, 0);
  __ call(RuntimeAddress(stub->runtime_stub()));
  __ addptr(rsp, 2 * BytesPerWord);

  // Verify result
  __ verify_oop(rax, "Bad oop");

  // Move result into place
  if (ref != rax) {
    __ movptr(ref, rax);
  }

  // Restore rax unless it is the result or tmp register
  if (ref != rax && tmp != rax) {
    __ pop(rax);
  }

  // Stub exit
  __ jmp(*stub->continuation());
}

#undef __
#define __ sasm->

void ZBarrierSetAssembler::generate_c1_load_barrier_runtime_stub(StubAssembler* sasm,
                                                                 DecoratorSet decorators) const {
  // Enter and save registers
  __ enter();
  __ save_live_registers_no_oop_map(true /* save_fpu_registers */);

  // Setup arguments
  __ load_parameter(1, c_rarg1);
  __ load_parameter(0, c_rarg0);

  // Call VM
  __ call_VM_leaf(ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), c_rarg0, c_rarg1);

  // Restore registers and return
  __ restore_live_registers_except_rax(true /* restore_fpu_registers */);
  __ leave();
  __ ret(0);
}

#endif // COMPILER1

#ifdef COMPILER2

OptoReg::Name ZBarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) {
  if (!OptoReg::is_reg(opto_reg)) {
    return OptoReg::Bad;
  }

  const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);
  if (vm_reg->is_XMMRegister()) {
    opto_reg &= ~15;
    switch (node->ideal_reg()) {
      case Op_VecX:
        opto_reg |= 2;
        break;
      case Op_VecY:
        opto_reg |= 4;
        break;
      case Op_VecZ:
        opto_reg |= 8;
        break;
      default:
        opto_reg |= 1;
        break;
    }
  }

  return opto_reg;
}

// We use the vec_spill_helper from the x86.ad file to avoid reinventing this wheel
extern int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
                            int stack_offset, int reg, uint ireg, outputStream* st);

#undef __
#define __ _masm->

class ZSaveLiveRegisters {
private:
  struct XMMRegisterData {
    XMMRegister _reg;
    int         _size;

    // Used by GrowableArray::find()
    bool operator == (const XMMRegisterData& other) {
      return _reg == other._reg;
    }
  };

  MacroAssembler* const          _masm;
  GrowableArray<Register>        _gp_registers;
  GrowableArray<XMMRegisterData> _xmm_registers;
  int                            _spill_size;
  int                            _spill_offset;

  static int xmm_compare_register_size(XMMRegisterData* left, XMMRegisterData* right) {
    if (left->_size == right->_size) {
      return 0;
    }

    return (left->_size < right->_size) ? -1 : 1;
  }

  static int xmm_slot_size(OptoReg::Name opto_reg) {
    // The low order 4 bytes denote what size of the XMM register is live
    return (opto_reg & 15) << 3;
  }

  static uint xmm_ideal_reg_for_size(int reg_size) {
    switch (reg_size) {
    case 8:
      return Op_VecD;
    case 16:
      return Op_VecX;
    case 32:
      return Op_VecY;
    case 64:
      return Op_VecZ;
    default:
      fatal("Invalid register size %d", reg_size);
      return 0;
    }
  }

  bool xmm_needs_vzeroupper() const {
    return _xmm_registers.is_nonempty() && _xmm_registers.at(0)._size > 16;
  }

  void xmm_register_save(const XMMRegisterData& reg_data) {
    const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
    const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
    _spill_offset -= reg_data._size;
    vec_spill_helper(__ code(), false /* do_size */, false /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
  }

  void xmm_register_restore(const XMMRegisterData& reg_data) {
    const OptoReg::Name opto_reg = OptoReg::as_OptoReg(reg_data._reg->as_VMReg());
    const uint ideal_reg = xmm_ideal_reg_for_size(reg_data._size);
    vec_spill_helper(__ code(), false /* do_size */, true /* is_load */, _spill_offset, opto_reg, ideal_reg, tty);
    _spill_offset += reg_data._size;
  }

  void gp_register_save(Register reg) {
    _spill_offset -= 8;
    __ movq(Address(rsp, _spill_offset), reg);
  }

  void gp_register_restore(Register reg) {
    __ movq(reg, Address(rsp, _spill_offset));
    _spill_offset += 8;
  }

  void initialize(ZLoadBarrierStubC2* stub) {
    // Create mask of caller saved registers that need to
    // be saved/restored if live
    RegMask caller_saved;
    caller_saved.Insert(OptoReg::as_OptoReg(rax->as_VMReg()));
    caller_saved.Insert(OptoReg::as_OptoReg(rcx->as_VMReg()));
    caller_saved.Insert(OptoReg::as_OptoReg(rdx->as_VMReg()));
    caller_saved.Insert(OptoReg::as_OptoReg(rsi->as_VMReg()));
    caller_saved.Insert(OptoReg::as_OptoReg(rdi->as_VMReg()));
    caller_saved.Insert(OptoReg::as_OptoReg(r8->as_VMReg()));
    caller_saved.Insert(OptoReg::as_OptoReg(r9->as_VMReg()));
    caller_saved.Insert(OptoReg::as_OptoReg(r10->as_VMReg()));
    caller_saved.Insert(OptoReg::as_OptoReg(r11->as_VMReg()));
    caller_saved.Remove(OptoReg::as_OptoReg(stub->ref()->as_VMReg()));

    // Create mask of live registers
    RegMask live = stub->live();
    if (stub->tmp() != noreg) {
      live.Insert(OptoReg::as_OptoReg(stub->tmp()->as_VMReg()));
    }

    int gp_spill_size = 0;
    int xmm_spill_size = 0;

    // Record registers that needs to be saved/restored
    while (live.is_NotEmpty()) {
      const OptoReg::Name opto_reg = live.find_first_elem();
      const VMReg vm_reg = OptoReg::as_VMReg(opto_reg);

      live.Remove(opto_reg);

      if (vm_reg->is_Register()) {
        if (caller_saved.Member(opto_reg)) {
          _gp_registers.append(vm_reg->as_Register());
          gp_spill_size += 8;
        }
      } else if (vm_reg->is_XMMRegister()) {
        // We encode in the low order 4 bits of the opto_reg, how large part of the register is live
        const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
        const int reg_size = xmm_slot_size(opto_reg);
        const XMMRegisterData reg_data = { vm_reg_base->as_XMMRegister(), reg_size };
        const int reg_index = _xmm_registers.find(reg_data);
        if (reg_index == -1) {
          // Not previously appended
          _xmm_registers.append(reg_data);
          xmm_spill_size += reg_size;
        } else {
          // Previously appended, update size
          const int reg_size_prev = _xmm_registers.at(reg_index)._size;
          if (reg_size > reg_size_prev) {
            _xmm_registers.at_put(reg_index, reg_data);
            xmm_spill_size += reg_size - reg_size_prev;
          }
        }
      } else {
        fatal("Unexpected register type");
      }
    }

    // Sort by size, largest first
    _xmm_registers.sort(xmm_compare_register_size);

    // Stack pointer must be 16 bytes aligned for the call
    _spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size, 16);
  }

public:
  ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
      _masm(masm),
      _gp_registers(),
      _xmm_registers(),
      _spill_size(0),
      _spill_offset(0) {

    //
    // Stack layout after registers have been spilled:
    //
    // | ...            | original rsp, 16 bytes aligned
    // ------------------
    // | zmm0 high      |
    // | ...            |
    // | zmm0 low       | 16 bytes aligned
    // | ...            |
    // | ymm1 high      |
    // | ...            |
    // | ymm1 low       | 16 bytes aligned
    // | ...            |
    // | xmmN high      |
    // | ...            |
    // | xmmN low       | 8 bytes aligned
    // | reg0           | 8 bytes aligned
    // | reg1           |
    // | ...            |
    // | regN           | new rsp, if 16 bytes aligned
    // | <padding>      | else new rsp, 16 bytes aligned
    // ------------------
    //

    // Figure out what registers to save/restore
    initialize(stub);

    // Allocate stack space
    if (_spill_size > 0) {
      __ subptr(rsp, _spill_size);
    }

    // Save XMM/YMM/ZMM registers
    for (int i = 0; i < _xmm_registers.length(); i++) {
      xmm_register_save(_xmm_registers.at(i));
    }

    if (xmm_needs_vzeroupper()) {
      __ vzeroupper();
    }

    // Save general purpose registers
    for (int i = 0; i < _gp_registers.length(); i++) {
      gp_register_save(_gp_registers.at(i));
    }
  }

  ~ZSaveLiveRegisters() {
    // Restore general purpose registers
    for (int i = _gp_registers.length() - 1; i >= 0; i--) {
      gp_register_restore(_gp_registers.at(i));
    }

    __ vzeroupper();

    // Restore XMM/YMM/ZMM registers
    for (int i = _xmm_registers.length() - 1; i >= 0; i--) {
      xmm_register_restore(_xmm_registers.at(i));
    }

    // Free stack space
    if (_spill_size > 0) {
      __ addptr(rsp, _spill_size);
    }
  }
};

class ZSetupArguments {
private:
  MacroAssembler* const _masm;
  const Register        _ref;
  const Address         _ref_addr;

public:
  ZSetupArguments(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
      _masm(masm),
      _ref(stub->ref()),
      _ref_addr(stub->ref_addr()) {

    // Setup arguments
    if (_ref_addr.base() == noreg) {
      // No self healing
      if (_ref != c_rarg0) {
        __ movq(c_rarg0, _ref);
      }
      __ xorq(c_rarg1, c_rarg1);
    } else {
      // Self healing
      if (_ref == c_rarg0) {
        __ lea(c_rarg1, _ref_addr);
      } else if (_ref != c_rarg1) {
        __ lea(c_rarg1, _ref_addr);
        __ movq(c_rarg0, _ref);
      } else if (_ref_addr.base() != c_rarg0 && _ref_addr.index() != c_rarg0) {
        __ movq(c_rarg0, _ref);
        __ lea(c_rarg1, _ref_addr);
      } else {
        __ xchgq(c_rarg0, c_rarg1);
        if (_ref_addr.base() == c_rarg0) {
          __ lea(c_rarg1, Address(c_rarg1, _ref_addr.index(), _ref_addr.scale(), _ref_addr.disp()));
        } else if (_ref_addr.index() == c_rarg0) {
          __ lea(c_rarg1, Address(_ref_addr.base(), c_rarg1, _ref_addr.scale(), _ref_addr.disp()));
        } else {
          ShouldNotReachHere();
        }
      }
    }
  }

  ~ZSetupArguments() {
    // Transfer result
    if (_ref != rax) {
      __ movq(_ref, rax);
    }
  }
};

#undef __
#define __ masm->

void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, ZLoadBarrierStubC2* stub) const {
  BLOCK_COMMENT("ZLoadBarrierStubC2");

  // Stub entry
  __ bind(*stub->entry());

  {
    ZSaveLiveRegisters save_live_registers(masm, stub);
    ZSetupArguments setup_arguments(masm, stub);
    __ call(RuntimeAddress(stub->slow_path()));
  }

  // Stub exit
  __ jmp(*stub->continuation());
}

#undef __

#endif // COMPILER2