src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
author dpochepk
Wed, 12 Dec 2018 15:26:49 +0300
changeset 52977 2e4903f83295
parent 52927 226c451bd954
child 53777 9bfeac2ee88a
permissions -rw-r--r--
8205421: AARCH64: StubCodeMark should be placed after alignment Reviewed-by: aph
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
     1
/*
49164
7e958a8ebcd3 8195142: Refactor out card table from CardTableModRefBS to flatten the BarrierSet hierarchy
eosterlund
parents: 47767
diff changeset
     2
 * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
30225
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
     3
 * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
     4
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
     5
 *
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
     6
 * This code is free software; you can redistribute it and/or modify it
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
     7
 * under the terms of the GNU General Public License version 2 only, as
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
     8
 * published by the Free Software Foundation.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
     9
 *
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    10
 * This code is distributed in the hope that it will be useful, but WITHOUT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    11
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    12
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    13
 * version 2 for more details (a copy is included in the LICENSE file that
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    14
 * accompanied this code).
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    15
 *
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    16
 * You should have received a copy of the GNU General Public License version
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    17
 * 2 along with this work; if not, write to the Free Software Foundation,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    18
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    19
 *
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    20
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    21
 * or visit www.oracle.com if you need additional information or have any
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    22
 * questions.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    23
 *
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    24
 */
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    25
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    26
#include "precompiled.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    27
#include "asm/macroAssembler.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    28
#include "asm/macroAssembler.inline.hpp"
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
    29
#include "gc/shared/barrierSet.hpp"
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
    30
#include "gc/shared/barrierSetAssembler.hpp"
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    31
#include "interpreter/interpreter.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    32
#include "nativeInst_aarch64.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    33
#include "oops/instanceOop.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    34
#include "oops/method.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    35
#include "oops/objArrayKlass.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    36
#include "oops/oop.inline.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    37
#include "prims/methodHandles.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    38
#include "runtime/frame.inline.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    39
#include "runtime/handles.inline.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    40
#include "runtime/sharedRuntime.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    41
#include "runtime/stubCodeGenerator.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    42
#include "runtime/stubRoutines.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    43
#include "runtime/thread.inline.hpp"
46625
edefffab74e2 8183552: Move align functions to align.hpp
stefank
parents: 45054
diff changeset
    44
#include "utilities/align.hpp"
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    45
#ifdef COMPILER2
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    46
#include "opto/runtime.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    47
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    48
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    49
#ifdef BUILTIN_SIM
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    50
#include "../../../../../../simulator/simulator.hpp"
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    51
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    52
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    53
// Declaration and definition of StubGenerator (no .hpp file).
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    54
// For a more detailed description of the stub routine structure
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    55
// see the comment in stubRoutines.hpp
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    56
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    57
#undef __
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    58
#define __ _masm->
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    59
#define TIMES_OOP Address::sxtw(exact_log2(UseCompressedOops ? 4 : 8))
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    60
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    61
#ifdef PRODUCT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    62
#define BLOCK_COMMENT(str) /* nothing */
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    63
#else
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    64
#define BLOCK_COMMENT(str) __ block_comment(str)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    65
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    66
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    67
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    68
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    69
// Stub Code definitions
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    70
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    71
class StubGenerator: public StubCodeGenerator {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    72
 private:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    73
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    74
#ifdef PRODUCT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    75
#define inc_counter_np(counter) ((void)0)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    76
#else
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    77
  void inc_counter_np_(int& counter) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    78
    __ lea(rscratch2, ExternalAddress((address)&counter));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    79
    __ ldrw(rscratch1, Address(rscratch2));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    80
    __ addw(rscratch1, rscratch1, 1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    81
    __ strw(rscratch1, Address(rscratch2));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    82
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    83
#define inc_counter_np(counter) \
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    84
  BLOCK_COMMENT("inc_counter " #counter); \
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    85
  inc_counter_np_(counter);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    86
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    87
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    88
  // Call stubs are used to call Java from C
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    89
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    90
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    91
  //    c_rarg0:   call wrapper address                   address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    92
  //    c_rarg1:   result                                 address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    93
  //    c_rarg2:   result type                            BasicType
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    94
  //    c_rarg3:   method                                 Method*
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    95
  //    c_rarg4:   (interpreter) entry point              address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    96
  //    c_rarg5:   parameters                             intptr_t*
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    97
  //    c_rarg6:   parameter size (in words)              int
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    98
  //    c_rarg7:   thread                                 Thread*
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
    99
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   100
  // There is no return from the stub itself as any Java result
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   101
  // is written to result
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   102
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   103
  // we save r30 (lr) as the return PC at the base of the frame and
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   104
  // link r29 (fp) below it as the frame pointer installing sp (r31)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   105
  // into fp.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   106
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   107
  // we save r0-r7, which accounts for all the c arguments.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   108
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   109
  // TODO: strictly do we need to save them all? they are treated as
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   110
  // volatile by C so could we omit saving the ones we are going to
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   111
  // place in global registers (thread? method?) or those we only use
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   112
  // during setup of the Java call?
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   113
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   114
  // we don't need to save r8 which C uses as an indirect result location
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   115
  // return register.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   116
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   117
  // we don't need to save r9-r15 which both C and Java treat as
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   118
  // volatile
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   119
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   120
  // we don't need to save r16-18 because Java does not use them
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   121
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   122
  // we save r19-r28 which Java uses as scratch registers and C
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   123
  // expects to be callee-save
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   124
  //
31955
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
   125
  // we save the bottom 64 bits of each value stored in v8-v15; it is
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
   126
  // the responsibility of the caller to preserve larger values.
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   127
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   128
  // so the stub frame looks like this when we enter Java code
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   129
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   130
  //     [ return_from_Java     ] <--- sp
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   131
  //     [ argument word n      ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   132
  //      ...
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   133
  // -27 [ argument word 1      ]
31955
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
   134
  // -26 [ saved v15            ] <--- sp_after_call
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
   135
  // -25 [ saved v14            ]
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
   136
  // -24 [ saved v13            ]
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
   137
  // -23 [ saved v12            ]
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
   138
  // -22 [ saved v11            ]
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
   139
  // -21 [ saved v10            ]
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
   140
  // -20 [ saved v9             ]
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
   141
  // -19 [ saved v8             ]
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   142
  // -18 [ saved r28            ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   143
  // -17 [ saved r27            ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   144
  // -16 [ saved r26            ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   145
  // -15 [ saved r25            ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   146
  // -14 [ saved r24            ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   147
  // -13 [ saved r23            ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   148
  // -12 [ saved r22            ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   149
  // -11 [ saved r21            ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   150
  // -10 [ saved r20            ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   151
  //  -9 [ saved r19            ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   152
  //  -8 [ call wrapper    (r0) ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   153
  //  -7 [ result          (r1) ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   154
  //  -6 [ result type     (r2) ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   155
  //  -5 [ method          (r3) ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   156
  //  -4 [ entry point     (r4) ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   157
  //  -3 [ parameters      (r5) ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   158
  //  -2 [ parameter size  (r6) ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   159
  //  -1 [ thread (r7)          ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   160
  //   0 [ saved fp       (r29) ] <--- fp == saved sp (r31)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   161
  //   1 [ saved lr       (r30) ]
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   162
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   163
  // Call stub stack layout word offsets from fp
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   164
  enum call_stub_layout {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   165
    sp_after_call_off = -26,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   166
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   167
    d15_off            = -26,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   168
    d13_off            = -24,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   169
    d11_off            = -22,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   170
    d9_off             = -20,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   171
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   172
    r28_off            = -18,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   173
    r26_off            = -16,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   174
    r24_off            = -14,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   175
    r22_off            = -12,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   176
    r20_off            = -10,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   177
    call_wrapper_off   =  -8,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   178
    result_off         =  -7,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   179
    result_type_off    =  -6,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   180
    method_off         =  -5,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   181
    entry_point_off    =  -4,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   182
    parameter_size_off =  -2,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   183
    thread_off         =  -1,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   184
    fp_f               =   0,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   185
    retaddr_off        =   1,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   186
  };
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   187
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   188
  address generate_call_stub(address& return_address) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   189
    assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 &&
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   190
           (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   191
           "adjust this code");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   192
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   193
    StubCodeMark mark(this, "StubRoutines", "call_stub");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   194
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   195
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   196
    const Address sp_after_call(rfp, sp_after_call_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   197
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   198
    const Address call_wrapper  (rfp, call_wrapper_off   * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   199
    const Address result        (rfp, result_off         * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   200
    const Address result_type   (rfp, result_type_off    * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   201
    const Address method        (rfp, method_off         * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   202
    const Address entry_point   (rfp, entry_point_off    * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   203
    const Address parameter_size(rfp, parameter_size_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   204
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   205
    const Address thread        (rfp, thread_off         * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   206
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   207
    const Address d15_save      (rfp, d15_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   208
    const Address d13_save      (rfp, d13_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   209
    const Address d11_save      (rfp, d11_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   210
    const Address d9_save       (rfp, d9_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   211
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   212
    const Address r28_save      (rfp, r28_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   213
    const Address r26_save      (rfp, r26_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   214
    const Address r24_save      (rfp, r24_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   215
    const Address r22_save      (rfp, r22_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   216
    const Address r20_save      (rfp, r20_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   217
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   218
    // stub code
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   219
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   220
    // we need a C prolog to bootstrap the x86 caller into the sim
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   221
    __ c_stub_prolog(8, 0, MacroAssembler::ret_type_void);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   222
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   223
    address aarch64_entry = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   224
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   225
#ifdef BUILTIN_SIM
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   226
    // Save sender's SP for stack traces.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   227
    __ mov(rscratch1, sp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   228
    __ str(rscratch1, Address(__ pre(sp, -2 * wordSize)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   229
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   230
    // set up frame and move sp to end of save area
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   231
    __ enter();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   232
    __ sub(sp, rfp, -sp_after_call_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   233
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   234
    // save register parameters and Java scratch/global registers
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   235
    // n.b. we save thread even though it gets installed in
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   236
    // rthread because we want to sanity check rthread later
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   237
    __ str(c_rarg7,  thread);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   238
    __ strw(c_rarg6, parameter_size);
36340
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   239
    __ stp(c_rarg4, c_rarg5,  entry_point);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   240
    __ stp(c_rarg2, c_rarg3,  result_type);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   241
    __ stp(c_rarg0, c_rarg1,  call_wrapper);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   242
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   243
    __ stp(r20, r19,   r20_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   244
    __ stp(r22, r21,   r22_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   245
    __ stp(r24, r23,   r24_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   246
    __ stp(r26, r25,   r26_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   247
    __ stp(r28, r27,   r28_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   248
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   249
    __ stpd(v9,  v8,   d9_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   250
    __ stpd(v11, v10,  d11_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   251
    __ stpd(v13, v12,  d13_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   252
    __ stpd(v15, v14,  d15_save);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   253
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   254
    // install Java thread in global register now we have saved
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   255
    // whatever value it held
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   256
    __ mov(rthread, c_rarg7);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   257
    // And method
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   258
    __ mov(rmethod, c_rarg3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   259
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   260
    // set up the heapbase register
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   261
    __ reinit_heapbase();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   262
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   263
#ifdef ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   264
    // make sure we have no pending exceptions
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   265
    {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   266
      Label L;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   267
      __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
   268
      __ cmp(rscratch1, (u1)NULL_WORD);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   269
      __ br(Assembler::EQ, L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   270
      __ stop("StubRoutines::call_stub: entered with pending exception");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   271
      __ BIND(L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   272
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   273
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   274
    // pass parameters if any
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   275
    __ mov(esp, sp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   276
    __ sub(rscratch1, sp, c_rarg6, ext::uxtw, LogBytesPerWord); // Move SP out of the way
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   277
    __ andr(sp, rscratch1, -2 * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   278
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   279
    BLOCK_COMMENT("pass parameters if any");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   280
    Label parameters_done;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   281
    // parameter count is still in c_rarg6
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   282
    // and parameter pointer identifying param 1 is in c_rarg5
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   283
    __ cbzw(c_rarg6, parameters_done);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   284
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   285
    address loop = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   286
    __ ldr(rscratch1, Address(__ post(c_rarg5, wordSize)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   287
    __ subsw(c_rarg6, c_rarg6, 1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   288
    __ push(rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   289
    __ br(Assembler::GT, loop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   290
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   291
    __ BIND(parameters_done);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   292
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   293
    // call Java entry -- passing methdoOop, and current sp
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   294
    //      rmethod: Method*
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   295
    //      r13: sender sp
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   296
    BLOCK_COMMENT("call Java function");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   297
    __ mov(r13, sp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   298
    __ blr(c_rarg4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   299
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   300
    // tell the simulator we have returned to the stub
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   301
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   302
    // we do this here because the notify will already have been done
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   303
    // if we get to the next instruction via an exception
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   304
    //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   305
    // n.b. adding this instruction here affects the calculation of
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   306
    // whether or not a routine returns to the call stub (used when
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   307
    // doing stack walks) since the normal test is to check the return
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   308
    // pc against the address saved below. so we may need to allow for
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   309
    // this extra instruction in the check.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   310
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   311
    if (NotifySimulator) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   312
      __ notify(Assembler::method_reentry);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   313
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   314
    // save current address for use by exception handling code
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   315
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   316
    return_address = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   317
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   318
    // store result depending on type (everything that is not
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   319
    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   320
    // n.b. this assumes Java returns an integral result in r0
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   321
    // and a floating result in j_farg0
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   322
    __ ldr(j_rarg2, result);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   323
    Label is_long, is_float, is_double, exit;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   324
    __ ldr(j_rarg1, result_type);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
   325
    __ cmp(j_rarg1, (u1)T_OBJECT);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   326
    __ br(Assembler::EQ, is_long);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
   327
    __ cmp(j_rarg1, (u1)T_LONG);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   328
    __ br(Assembler::EQ, is_long);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
   329
    __ cmp(j_rarg1, (u1)T_FLOAT);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   330
    __ br(Assembler::EQ, is_float);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
   331
    __ cmp(j_rarg1, (u1)T_DOUBLE);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   332
    __ br(Assembler::EQ, is_double);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   333
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   334
    // handle T_INT case
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   335
    __ strw(r0, Address(j_rarg2));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   336
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   337
    __ BIND(exit);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   338
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   339
    // pop parameters
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   340
    __ sub(esp, rfp, -sp_after_call_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   341
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   342
#ifdef ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   343
    // verify that threads correspond
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   344
    {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   345
      Label L, S;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   346
      __ ldr(rscratch1, thread);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   347
      __ cmp(rthread, rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   348
      __ br(Assembler::NE, S);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   349
      __ get_thread(rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   350
      __ cmp(rthread, rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   351
      __ br(Assembler::EQ, L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   352
      __ BIND(S);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   353
      __ stop("StubRoutines::call_stub: threads must correspond");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   354
      __ BIND(L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   355
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   356
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   357
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   358
    // restore callee-save registers
36340
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   359
    __ ldpd(v15, v14,  d15_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   360
    __ ldpd(v13, v12,  d13_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   361
    __ ldpd(v11, v10,  d11_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   362
    __ ldpd(v9,  v8,   d9_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   363
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   364
    __ ldp(r28, r27,   r28_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   365
    __ ldp(r26, r25,   r26_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   366
    __ ldp(r24, r23,   r24_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   367
    __ ldp(r22, r21,   r22_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   368
    __ ldp(r20, r19,   r20_save);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   369
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   370
    __ ldp(c_rarg0, c_rarg1,  call_wrapper);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   371
    __ ldrw(c_rarg2, result_type);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   372
    __ ldr(c_rarg3,  method);
36340
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   373
    __ ldp(c_rarg4, c_rarg5,  entry_point);
f1401b7f2d58 8149907: aarch64: use load/store pair instructions in call_stub
fyang
parents: 36326
diff changeset
   374
    __ ldp(c_rarg6, c_rarg7,  parameter_size);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   375
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   376
#ifndef PRODUCT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   377
    // tell the simulator we are about to end Java execution
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   378
    if (NotifySimulator) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   379
      __ notify(Assembler::method_exit);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   380
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   381
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   382
    // leave frame and return to caller
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   383
    __ leave();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   384
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   385
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   386
    // handle return types different from T_INT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   387
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   388
    __ BIND(is_long);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   389
    __ str(r0, Address(j_rarg2, 0));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   390
    __ br(Assembler::AL, exit);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   391
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   392
    __ BIND(is_float);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   393
    __ strs(j_farg0, Address(j_rarg2, 0));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   394
    __ br(Assembler::AL, exit);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   395
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   396
    __ BIND(is_double);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   397
    __ strd(j_farg0, Address(j_rarg2, 0));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   398
    __ br(Assembler::AL, exit);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   399
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   400
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   401
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   402
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   403
  // Return point for a Java call if there's an exception thrown in
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   404
  // Java code.  The exception is caught and transformed into a
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   405
  // pending exception stored in JavaThread that can be tested from
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   406
  // within the VM.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   407
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   408
  // Note: Usually the parameters are removed by the callee. In case
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   409
  // of an exception crossing an activation frame boundary, that is
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   410
  // not the case if the callee is compiled code => need to setup the
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   411
  // rsp.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   412
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   413
  // r0: exception oop
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   414
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   415
  // NOTE: this is used as a target from the signal handler so it
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   416
  // needs an x86 prolog which returns into the current simulator
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   417
  // executing the generated catch_exception code. so the prolog
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   418
  // needs to install rax in a sim register and adjust the sim's
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   419
  // restart pc to enter the generated code at the start position
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   420
  // then return from native to simulated execution.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   421
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   422
  address generate_catch_exception() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   423
    StubCodeMark mark(this, "StubRoutines", "catch_exception");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   424
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   425
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   426
    // same as in generate_call_stub():
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   427
    const Address sp_after_call(rfp, sp_after_call_off * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   428
    const Address thread        (rfp, thread_off         * wordSize);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   429
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   430
#ifdef ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   431
    // verify that threads correspond
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   432
    {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   433
      Label L, S;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   434
      __ ldr(rscratch1, thread);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   435
      __ cmp(rthread, rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   436
      __ br(Assembler::NE, S);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   437
      __ get_thread(rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   438
      __ cmp(rthread, rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   439
      __ br(Assembler::EQ, L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   440
      __ bind(S);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   441
      __ stop("StubRoutines::catch_exception: threads must correspond");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   442
      __ bind(L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   443
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   444
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   445
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   446
    // set pending exception
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   447
    __ verify_oop(r0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   448
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   449
    __ str(r0, Address(rthread, Thread::pending_exception_offset()));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   450
    __ mov(rscratch1, (address)__FILE__);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   451
    __ str(rscratch1, Address(rthread, Thread::exception_file_offset()));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   452
    __ movw(rscratch1, (int)__LINE__);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   453
    __ strw(rscratch1, Address(rthread, Thread::exception_line_offset()));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   454
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   455
    // complete return to VM
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   456
    assert(StubRoutines::_call_stub_return_address != NULL,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   457
           "_call_stub_return_address must have been generated before");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   458
    __ b(StubRoutines::_call_stub_return_address);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   459
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   460
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   461
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   462
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   463
  // Continuation point for runtime calls returning with a pending
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   464
  // exception.  The pending exception check happened in the runtime
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   465
  // or native call stub.  The pending exception in Thread is
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   466
  // converted into a Java-level exception.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   467
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   468
  // Contract with Java-level exception handlers:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   469
  // r0: exception
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   470
  // r3: throwing pc
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   471
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   472
  // NOTE: At entry of this stub, exception-pc must be in LR !!
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   473
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   474
  // NOTE: this is always used as a jump target within generated code
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   475
  // so it just needs to be generated code wiht no x86 prolog
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   476
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   477
  address generate_forward_exception() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   478
    StubCodeMark mark(this, "StubRoutines", "forward exception");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   479
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   480
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   481
    // Upon entry, LR points to the return address returning into
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   482
    // Java (interpreted or compiled) code; i.e., the return address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   483
    // becomes the throwing pc.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   484
    //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   485
    // Arguments pushed before the runtime call are still on the stack
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   486
    // but the exception handler will reset the stack pointer ->
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   487
    // ignore them.  A potential result in registers can be ignored as
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   488
    // well.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   489
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   490
#ifdef ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   491
    // make sure this code is only executed if there is a pending exception
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   492
    {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   493
      Label L;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   494
      __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   495
      __ cbnz(rscratch1, L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   496
      __ stop("StubRoutines::forward exception: no pending exception (1)");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   497
      __ bind(L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   498
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   499
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   500
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   501
    // compute exception handler into r19
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   502
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   503
    // call the VM to find the handler address associated with the
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   504
    // caller address. pass thread in r0 and caller pc (ret address)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   505
    // in r1. n.b. the caller pc is in lr, unlike x86 where it is on
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   506
    // the stack.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   507
    __ mov(c_rarg1, lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   508
    // lr will be trashed by the VM call so we move it to R19
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   509
    // (callee-saved) because we also need to pass it to the handler
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   510
    // returned by this call.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   511
    __ mov(r19, lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   512
    BLOCK_COMMENT("call exception_handler_for_return_address");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   513
    __ call_VM_leaf(CAST_FROM_FN_PTR(address,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   514
                         SharedRuntime::exception_handler_for_return_address),
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   515
                    rthread, c_rarg1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   516
    // we should not really care that lr is no longer the callee
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   517
    // address. we saved the value the handler needs in r19 so we can
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   518
    // just copy it to r3. however, the C2 handler will push its own
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   519
    // frame and then calls into the VM and the VM code asserts that
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   520
    // the PC for the frame above the handler belongs to a compiled
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   521
    // Java method. So, we restore lr here to satisfy that assert.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   522
    __ mov(lr, r19);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   523
    // setup r0 & r3 & clear pending exception
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   524
    __ mov(r3, r19);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   525
    __ mov(r19, r0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   526
    __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   527
    __ str(zr, Address(rthread, Thread::pending_exception_offset()));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   528
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   529
#ifdef ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   530
    // make sure exception is set
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   531
    {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   532
      Label L;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   533
      __ cbnz(r0, L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   534
      __ stop("StubRoutines::forward exception: no pending exception (2)");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   535
      __ bind(L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   536
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   537
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   538
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   539
    // continue at exception handler
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   540
    // r0: exception
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   541
    // r3: throwing pc
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   542
    // r19: exception handler
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   543
    __ verify_oop(r0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   544
    __ br(r19);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   545
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   546
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   547
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   548
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   549
  // Non-destructive plausibility checks for oops
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   550
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   551
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   552
  //    r0: oop to verify
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   553
  //    rscratch1: error message
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   554
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   555
  // Stack after saving c_rarg3:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   556
  //    [tos + 0]: saved c_rarg3
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   557
  //    [tos + 1]: saved c_rarg2
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   558
  //    [tos + 2]: saved lr
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   559
  //    [tos + 3]: saved rscratch2
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   560
  //    [tos + 4]: saved r0
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   561
  //    [tos + 5]: saved rscratch1
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   562
  address generate_verify_oop() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   563
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   564
    StubCodeMark mark(this, "StubRoutines", "verify_oop");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   565
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   566
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   567
    Label exit, error;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   568
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   569
    // save c_rarg2 and c_rarg3
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   570
    __ stp(c_rarg3, c_rarg2, Address(__ pre(sp, -16)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   571
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   572
    // __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   573
    __ lea(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr()));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   574
    __ ldr(c_rarg3, Address(c_rarg2));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   575
    __ add(c_rarg3, c_rarg3, 1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   576
    __ str(c_rarg3, Address(c_rarg2));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   577
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   578
    // object is in r0
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   579
    // make sure object is 'reasonable'
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   580
    __ cbz(r0, exit); // if obj is NULL it is OK
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   581
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   582
    // Check if the oop is in the right area of memory
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   583
    __ mov(c_rarg3, (intptr_t) Universe::verify_oop_mask());
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   584
    __ andr(c_rarg2, r0, c_rarg3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   585
    __ mov(c_rarg3, (intptr_t) Universe::verify_oop_bits());
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   586
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   587
    // Compare c_rarg2 and c_rarg3.  We don't use a compare
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   588
    // instruction here because the flags register is live.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   589
    __ eor(c_rarg2, c_rarg2, c_rarg3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   590
    __ cbnz(c_rarg2, error);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   591
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   592
    // make sure klass is 'reasonable', which is not zero.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   593
    __ load_klass(r0, r0);  // get klass
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   594
    __ cbz(r0, error);      // if klass is NULL it is broken
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   595
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   596
    // return if everything seems ok
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   597
    __ bind(exit);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   598
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   599
    __ ldp(c_rarg3, c_rarg2, Address(__ post(sp, 16)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   600
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   601
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   602
    // handle errors
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   603
    __ bind(error);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   604
    __ ldp(c_rarg3, c_rarg2, Address(__ post(sp, 16)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   605
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   606
    __ push(RegSet::range(r0, r29), sp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   607
    // debug(char* msg, int64_t pc, int64_t regs[])
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   608
    __ mov(c_rarg0, rscratch1);      // pass address of error message
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   609
    __ mov(c_rarg1, lr);             // pass return address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   610
    __ mov(c_rarg2, sp);             // pass address of regs on stack
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   611
#ifndef PRODUCT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   612
    assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   613
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   614
    BLOCK_COMMENT("call MacroAssembler::debug");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   615
    __ mov(rscratch1, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   616
    __ blrt(rscratch1, 3, 0, 1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   617
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   618
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   619
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   620
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   621
  void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   622
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   623
  // The inner part of zero_words().  This is the bulk operation,
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   624
  // zeroing words in blocks, possibly using DC ZVA to do it.  The
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   625
  // caller is responsible for zeroing the last few words.
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   626
  //
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   627
  // Inputs:
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   628
  // r10: the HeapWord-aligned base address of an array to zero.
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   629
  // r11: the count in HeapWords, r11 > 0.
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   630
  //
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   631
  // Returns r10 and r11, adjusted for the caller to clear.
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   632
  // r10: the base address of the tail of words left to clear.
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   633
  // r11: the number of words in the tail.
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   634
  //      r11 < MacroAssembler::zero_words_block_size.
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   635
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   636
  address generate_zero_blocks() {
51756
4bd35a5ec694 8210676: Remove some unused Label variables
mikael
parents: 51619
diff changeset
   637
    Label done;
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   638
    Label base_aligned;
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   639
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   640
    Register base = r10, cnt = r11;
38143
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
   641
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
   642
    __ align(CodeEntryAlignment);
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   643
    StubCodeMark mark(this, "StubRoutines", "zero_blocks");
38143
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
   644
    address start = __ pc();
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
   645
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   646
    if (UseBlockZeroing) {
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   647
      int zva_length = VM_Version::zva_length();
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   648
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   649
      // Ensure ZVA length can be divided by 16. This is required by
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   650
      // the subsequent operations.
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   651
      assert (zva_length % 16 == 0, "Unexpected ZVA Length");
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   652
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   653
      __ tbz(base, 3, base_aligned);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   654
      __ str(zr, Address(__ post(base, 8)));
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   655
      __ sub(cnt, cnt, 1);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   656
      __ bind(base_aligned);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   657
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   658
      // Ensure count >= zva_length * 2 so that it still deserves a zva after
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   659
      // alignment.
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   660
      Label small;
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   661
      int low_limit = MAX2(zva_length * 2, (int)BlockZeroingLowLimit);
46720
5c3f87b90eff 8184900: AArch64: Fix overflow in immediate cmp instruction
yzhang
parents: 46695
diff changeset
   662
      __ subs(rscratch1, cnt, low_limit >> 3);
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   663
      __ br(Assembler::LT, small);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   664
      __ zero_dcache_blocks(base, cnt);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   665
      __ bind(small);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   666
    }
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   667
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   668
    {
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   669
      // Number of stp instructions we'll unroll
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   670
      const int unroll =
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   671
        MacroAssembler::zero_words_block_size / 2;
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   672
      // Clear the remaining blocks.
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   673
      Label loop;
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   674
      __ subs(cnt, cnt, unroll * 2);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   675
      __ br(Assembler::LT, done);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   676
      __ bind(loop);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   677
      for (int i = 0; i < unroll; i++)
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   678
        __ stp(zr, zr, __ post(base, 16));
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   679
      __ subs(cnt, cnt, unroll * 2);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   680
      __ br(Assembler::GE, loop);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   681
      __ bind(done);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   682
      __ add(cnt, cnt, unroll * 2);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   683
    }
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   684
38143
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
   685
    __ ret(lr);
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
   686
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
   687
    return start;
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
   688
  }
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
   689
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
   690
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   691
  typedef enum {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   692
    copy_forwards = 1,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   693
    copy_backwards = -1
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   694
  } copy_direction;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   695
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   696
  // Bulk copy of blocks of 8 words.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   697
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   698
  // count is a count of words.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   699
  //
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   700
  // Precondition: count >= 8
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   701
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   702
  // Postconditions:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   703
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   704
  // The least significant bit of count contains the remaining count
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   705
  // of words to copy.  The rest of count is trash.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   706
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   707
  // s and d are adjusted to point to the remaining words to copy
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   708
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   709
  void generate_copy_longs(Label &start, Register s, Register d, Register count,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   710
                           copy_direction direction) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   711
    int unit = wordSize * direction;
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   712
    int bias = (UseSIMDForMemoryOps ? 4:2) * wordSize;
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   713
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   714
    int offset;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   715
    const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   716
      t4 = r7, t5 = r10, t6 = r11, t7 = r12;
35841
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   717
    const Register stride = r13;
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   718
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   719
    assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   720
    assert_different_registers(s, d, count, rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   721
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   722
    Label again, drain;
35843
67b6050f5ce8 8149080: AArch64: Recognise disjoint array copy in stub code
hshi
parents: 35841
diff changeset
   723
    const char *stub_name;
67b6050f5ce8 8149080: AArch64: Recognise disjoint array copy in stub code
hshi
parents: 35841
diff changeset
   724
    if (direction == copy_forwards)
46695
aaaac1d98bc5 8183533: AArch64: redundent registers saving in arraycopy stubs
njian
parents: 46625
diff changeset
   725
      stub_name = "forward_copy_longs";
35843
67b6050f5ce8 8149080: AArch64: Recognise disjoint array copy in stub code
hshi
parents: 35841
diff changeset
   726
    else
67b6050f5ce8 8149080: AArch64: Recognise disjoint array copy in stub code
hshi
parents: 35841
diff changeset
   727
      stub_name = "backward_copy_longs";
52977
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
   728
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
   729
    __ align(CodeEntryAlignment);
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
   730
35843
67b6050f5ce8 8149080: AArch64: Recognise disjoint array copy in stub code
hshi
parents: 35841
diff changeset
   731
    StubCodeMark mark(this, "StubRoutines", stub_name);
52977
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
   732
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   733
    __ bind(start);
40023
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   734
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   735
    Label unaligned_copy_long;
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   736
    if (AvoidUnalignedAccesses) {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   737
      __ tbnz(d, 3, unaligned_copy_long);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   738
    }
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   739
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   740
    if (direction == copy_forwards) {
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   741
      __ sub(s, s, bias);
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   742
      __ sub(d, d, bias);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   743
    }
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   744
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   745
#ifdef ASSERT
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   746
    // Make sure we are never given < 8 words
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   747
    {
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   748
      Label L;
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
   749
      __ cmp(count, (u1)8);
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   750
      __ br(Assembler::GE, L);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   751
      __ stop("genrate_copy_longs called with < 8 words");
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   752
      __ bind(L);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   753
    }
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   754
#endif
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   755
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   756
    // Fill 8 registers
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   757
    if (UseSIMDForMemoryOps) {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   758
      __ ldpq(v0, v1, Address(s, 4 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   759
      __ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   760
    } else {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   761
      __ ldp(t0, t1, Address(s, 2 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   762
      __ ldp(t2, t3, Address(s, 4 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   763
      __ ldp(t4, t5, Address(s, 6 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   764
      __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   765
    }
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   766
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   767
    __ subs(count, count, 16);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   768
    __ br(Assembler::LO, drain);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   769
35841
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   770
    int prefetch = PrefetchCopyIntervalInBytes;
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   771
    bool use_stride = false;
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   772
    if (direction == copy_backwards) {
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   773
       use_stride = prefetch > 256;
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   774
       prefetch = -prefetch;
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   775
       if (use_stride) __ mov(stride, prefetch);
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   776
    }
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   777
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   778
    __ bind(again);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   779
35841
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   780
    if (PrefetchCopyIntervalInBytes > 0)
39f8dc1df42b 8149365: aarch64: memory copy does not prefetch on backwards copy
enevill
parents: 35839
diff changeset
   781
      __ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   782
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   783
    if (UseSIMDForMemoryOps) {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   784
      __ stpq(v0, v1, Address(d, 4 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   785
      __ ldpq(v0, v1, Address(s, 4 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   786
      __ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   787
      __ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   788
    } else {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   789
      __ stp(t0, t1, Address(d, 2 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   790
      __ ldp(t0, t1, Address(s, 2 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   791
      __ stp(t2, t3, Address(d, 4 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   792
      __ ldp(t2, t3, Address(s, 4 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   793
      __ stp(t4, t5, Address(d, 6 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   794
      __ ldp(t4, t5, Address(s, 6 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   795
      __ stp(t6, t7, Address(__ pre(d, 8 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   796
      __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   797
    }
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   798
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   799
    __ subs(count, count, 8);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   800
    __ br(Assembler::HS, again);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   801
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   802
    // Drain
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
   803
    __ bind(drain);
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   804
    if (UseSIMDForMemoryOps) {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   805
      __ stpq(v0, v1, Address(d, 4 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   806
      __ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   807
    } else {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   808
      __ stp(t0, t1, Address(d, 2 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   809
      __ stp(t2, t3, Address(d, 4 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   810
      __ stp(t4, t5, Address(d, 6 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   811
      __ stp(t6, t7, Address(__ pre(d, 8 * unit)));
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   812
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   813
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   814
    {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   815
      Label L1, L2;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   816
      __ tbz(count, exact_log2(4), L1);
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   817
      if (UseSIMDForMemoryOps) {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   818
        __ ldpq(v0, v1, Address(__ pre(s, 4 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   819
        __ stpq(v0, v1, Address(__ pre(d, 4 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   820
      } else {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   821
        __ ldp(t0, t1, Address(s, 2 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   822
        __ ldp(t2, t3, Address(__ pre(s, 4 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   823
        __ stp(t0, t1, Address(d, 2 * unit));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   824
        __ stp(t2, t3, Address(__ pre(d, 4 * unit)));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   825
      }
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   826
      __ bind(L1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   827
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   828
      if (direction == copy_forwards) {
36595
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents: 36564
diff changeset
   829
        __ add(s, s, bias);
3322a76f3a00 8151502: optimize pd_disjoint_words and pd_conjoint_words
enevill
parents: 36564
diff changeset
   830
        __ add(d, d, bias);
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   831
      }
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
   832
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   833
      __ tbz(count, 1, L2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   834
      __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   835
      __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   836
      __ bind(L2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   837
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   838
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
   839
    __ ret(lr);
40023
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   840
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   841
    if (AvoidUnalignedAccesses) {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   842
      Label drain, again;
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   843
      // Register order for storing. Order is different for backward copy.
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   844
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   845
      __ bind(unaligned_copy_long);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   846
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   847
      // source address is even aligned, target odd aligned
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   848
      //
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   849
      // when forward copying word pairs we read long pairs at offsets
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   850
      // {0, 2, 4, 6} (in long words). when backwards copying we read
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   851
      // long pairs at offsets {-2, -4, -6, -8}. We adjust the source
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   852
      // address by -2 in the forwards case so we can compute the
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   853
      // source offsets for both as {2, 4, 6, 8} * unit where unit = 1
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   854
      // or -1.
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   855
      //
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   856
      // when forward copying we need to store 1 word, 3 pairs and
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   857
      // then 1 word at offsets {0, 1, 3, 5, 7}. Rather thna use a
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   858
      // zero offset We adjust the destination by -1 which means we
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   859
      // have to use offsets { 1, 2, 4, 6, 8} * unit for the stores.
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   860
      //
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   861
      // When backwards copyng we need to store 1 word, 3 pairs and
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   862
      // then 1 word at offsets {-1, -3, -5, -7, -8} i.e. we use
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   863
      // offsets {1, 3, 5, 7, 8} * unit.
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   864
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   865
      if (direction == copy_forwards) {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   866
        __ sub(s, s, 16);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   867
        __ sub(d, d, 8);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   868
      }
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   869
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   870
      // Fill 8 registers
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   871
      //
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   872
      // for forwards copy s was offset by -16 from the original input
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   873
      // value of s so the register contents are at these offsets
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   874
      // relative to the 64 bit block addressed by that original input
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   875
      // and so on for each successive 64 byte block when s is updated
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   876
      //
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   877
      // t0 at offset 0,  t1 at offset 8
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   878
      // t2 at offset 16, t3 at offset 24
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   879
      // t4 at offset 32, t5 at offset 40
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   880
      // t6 at offset 48, t7 at offset 56
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   881
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   882
      // for backwards copy s was not offset so the register contents
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   883
      // are at these offsets into the preceding 64 byte block
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   884
      // relative to that original input and so on for each successive
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   885
      // preceding 64 byte block when s is updated. this explains the
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   886
      // slightly counter-intuitive looking pattern of register usage
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   887
      // in the stp instructions for backwards copy.
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   888
      //
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   889
      // t0 at offset -16, t1 at offset -8
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   890
      // t2 at offset -32, t3 at offset -24
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   891
      // t4 at offset -48, t5 at offset -40
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   892
      // t6 at offset -64, t7 at offset -56
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   893
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   894
      __ ldp(t0, t1, Address(s, 2 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   895
      __ ldp(t2, t3, Address(s, 4 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   896
      __ ldp(t4, t5, Address(s, 6 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   897
      __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   898
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   899
      __ subs(count, count, 16);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   900
      __ br(Assembler::LO, drain);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   901
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   902
      int prefetch = PrefetchCopyIntervalInBytes;
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   903
      bool use_stride = false;
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   904
      if (direction == copy_backwards) {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   905
         use_stride = prefetch > 256;
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   906
         prefetch = -prefetch;
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   907
         if (use_stride) __ mov(stride, prefetch);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   908
      }
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   909
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   910
      __ bind(again);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   911
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   912
      if (PrefetchCopyIntervalInBytes > 0)
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   913
        __ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   914
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   915
      if (direction == copy_forwards) {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   916
       // allowing for the offset of -8 the store instructions place
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   917
       // registers into the target 64 bit block at the following
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   918
       // offsets
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   919
       //
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   920
       // t0 at offset 0
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   921
       // t1 at offset 8,  t2 at offset 16
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   922
       // t3 at offset 24, t4 at offset 32
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   923
       // t5 at offset 40, t6 at offset 48
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   924
       // t7 at offset 56
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   925
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   926
        __ str(t0, Address(d, 1 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   927
        __ stp(t1, t2, Address(d, 2 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   928
        __ ldp(t0, t1, Address(s, 2 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   929
        __ stp(t3, t4, Address(d, 4 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   930
        __ ldp(t2, t3, Address(s, 4 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   931
        __ stp(t5, t6, Address(d, 6 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   932
        __ ldp(t4, t5, Address(s, 6 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   933
        __ str(t7, Address(__ pre(d, 8 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   934
        __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   935
      } else {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   936
       // d was not offset when we started so the registers are
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   937
       // written into the 64 bit block preceding d with the following
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   938
       // offsets
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   939
       //
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   940
       // t1 at offset -8
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   941
       // t3 at offset -24, t0 at offset -16
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   942
       // t5 at offset -48, t2 at offset -32
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   943
       // t7 at offset -56, t4 at offset -48
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   944
       //                   t6 at offset -64
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   945
       //
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   946
       // note that this matches the offsets previously noted for the
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   947
       // loads
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   948
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   949
        __ str(t1, Address(d, 1 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   950
        __ stp(t3, t0, Address(d, 3 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   951
        __ ldp(t0, t1, Address(s, 2 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   952
        __ stp(t5, t2, Address(d, 5 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   953
        __ ldp(t2, t3, Address(s, 4 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   954
        __ stp(t7, t4, Address(d, 7 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   955
        __ ldp(t4, t5, Address(s, 6 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   956
        __ str(t6, Address(__ pre(d, 8 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   957
        __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   958
      }
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   959
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   960
      __ subs(count, count, 8);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   961
      __ br(Assembler::HS, again);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   962
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   963
      // Drain
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   964
      //
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   965
      // this uses the same pattern of offsets and register arguments
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   966
      // as above
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   967
      __ bind(drain);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   968
      if (direction == copy_forwards) {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   969
        __ str(t0, Address(d, 1 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   970
        __ stp(t1, t2, Address(d, 2 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   971
        __ stp(t3, t4, Address(d, 4 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   972
        __ stp(t5, t6, Address(d, 6 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   973
        __ str(t7, Address(__ pre(d, 8 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   974
      } else {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   975
        __ str(t1, Address(d, 1 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   976
        __ stp(t3, t0, Address(d, 3 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   977
        __ stp(t5, t2, Address(d, 5 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   978
        __ stp(t7, t4, Address(d, 7 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   979
        __ str(t6, Address(__ pre(d, 8 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   980
      }
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   981
      // now we need to copy any remaining part block which may
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   982
      // include a 4 word block subblock and/or a 2 word subblock.
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   983
      // bits 2 and 1 in the count are the tell-tale for whetehr we
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   984
      // have each such subblock
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   985
      {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   986
        Label L1, L2;
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   987
        __ tbz(count, exact_log2(4), L1);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   988
       // this is the same as above but copying only 4 longs hence
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   989
       // with ony one intervening stp between the str instructions
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   990
       // but note that the offsets and registers still follow the
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   991
       // same pattern
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   992
        __ ldp(t0, t1, Address(s, 2 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   993
        __ ldp(t2, t3, Address(__ pre(s, 4 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   994
        if (direction == copy_forwards) {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   995
          __ str(t0, Address(d, 1 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   996
          __ stp(t1, t2, Address(d, 2 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   997
          __ str(t3, Address(__ pre(d, 4 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   998
        } else {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
   999
          __ str(t1, Address(d, 1 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1000
          __ stp(t3, t0, Address(d, 3 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1001
          __ str(t2, Address(__ pre(d, 4 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1002
        }
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1003
        __ bind(L1);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1004
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1005
        __ tbz(count, 1, L2);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1006
       // this is the same as above but copying only 2 longs hence
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1007
       // there is no intervening stp between the str instructions
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1008
       // but note that the offset and register patterns are still
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1009
       // the same
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1010
        __ ldp(t0, t1, Address(__ pre(s, 2 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1011
        if (direction == copy_forwards) {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1012
          __ str(t0, Address(d, 1 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1013
          __ str(t1, Address(__ pre(d, 2 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1014
        } else {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1015
          __ str(t1, Address(d, 1 * unit));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1016
          __ str(t0, Address(__ pre(d, 2 * unit)));
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1017
        }
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1018
        __ bind(L2);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1019
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1020
       // for forwards copy we need to re-adjust the offsets we
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1021
       // applied so that s and d are follow the last words written
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1022
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1023
       if (direction == copy_forwards) {
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1024
         __ add(s, s, 16);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1025
         __ add(d, d, 8);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1026
       }
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1027
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1028
      }
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1029
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1030
      __ ret(lr);
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1031
      }
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1032
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1033
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1034
  // Small copy: less than 16 bytes.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1035
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1036
  // NB: Ignores all of the bits of count which represent more than 15
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1037
  // bytes, so a caller doesn't have to mask them.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1038
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1039
  void copy_memory_small(Register s, Register d, Register count, Register tmp, int step) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1040
    bool is_backwards = step < 0;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1041
    size_t granularity = uabs(step);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1042
    int direction = is_backwards ? -1 : 1;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1043
    int unit = wordSize * direction;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1044
51756
4bd35a5ec694 8210676: Remove some unused Label variables
mikael
parents: 51619
diff changeset
  1045
    Label Lword, Lint, Lshort, Lbyte;
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1046
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1047
    assert(granularity
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1048
           && granularity <= sizeof (jlong), "Impossible granularity in copy_memory_small");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1049
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1050
    const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1051
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1052
    // ??? I don't know if this bit-test-and-branch is the right thing
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1053
    // to do.  It does a lot of jumping, resulting in several
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1054
    // mispredicted branches.  It might make more sense to do this
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1055
    // with something like Duff's device with a single computed branch.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1056
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1057
    __ tbz(count, 3 - exact_log2(granularity), Lword);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1058
    __ ldr(tmp, Address(__ adjust(s, unit, is_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1059
    __ str(tmp, Address(__ adjust(d, unit, is_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1060
    __ bind(Lword);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1061
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1062
    if (granularity <= sizeof (jint)) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1063
      __ tbz(count, 2 - exact_log2(granularity), Lint);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1064
      __ ldrw(tmp, Address(__ adjust(s, sizeof (jint) * direction, is_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1065
      __ strw(tmp, Address(__ adjust(d, sizeof (jint) * direction, is_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1066
      __ bind(Lint);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1067
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1068
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1069
    if (granularity <= sizeof (jshort)) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1070
      __ tbz(count, 1 - exact_log2(granularity), Lshort);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1071
      __ ldrh(tmp, Address(__ adjust(s, sizeof (jshort) * direction, is_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1072
      __ strh(tmp, Address(__ adjust(d, sizeof (jshort) * direction, is_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1073
      __ bind(Lshort);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1074
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1075
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1076
    if (granularity <= sizeof (jbyte)) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1077
      __ tbz(count, 0, Lbyte);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1078
      __ ldrb(tmp, Address(__ adjust(s, sizeof (jbyte) * direction, is_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1079
      __ strb(tmp, Address(__ adjust(d, sizeof (jbyte) * direction, is_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1080
      __ bind(Lbyte);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1081
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1082
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1083
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1084
  Label copy_f, copy_b;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1085
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1086
  // All-singing all-dancing memory copy.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1087
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1088
  // Copy count units of memory from s to d.  The size of a unit is
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1089
  // step, which can be positive or negative depending on the direction
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1090
  // of copy.  If is_aligned is false, we align the source address.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1091
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1092
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1093
  void copy_memory(bool is_aligned, Register s, Register d,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1094
                   Register count, Register tmp, int step) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1095
    copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1096
    bool is_backwards = step < 0;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1097
    int granularity = uabs(step);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1098
    const Register t0 = r3, t1 = r4;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1099
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1100
    // <= 96 bytes do inline. Direction doesn't matter because we always
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1101
    // load all the data before writing anything
51756
4bd35a5ec694 8210676: Remove some unused Label variables
mikael
parents: 51619
diff changeset
  1102
    Label copy4, copy8, copy16, copy32, copy80, copy_big, finish;
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1103
    const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1104
    const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1105
    const Register send = r17, dend = r18;
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1106
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1107
    if (PrefetchCopyIntervalInBytes > 0)
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1108
      __ prfm(Address(s, 0), PLDL1KEEP);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  1109
    __ cmp(count, u1((UseSIMDForMemoryOps ? 96:80)/granularity));
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1110
    __ br(Assembler::HI, copy_big);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1111
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1112
    __ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1113
    __ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1114
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  1115
    __ cmp(count, u1(16/granularity));
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1116
    __ br(Assembler::LS, copy16);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1117
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  1118
    __ cmp(count, u1(64/granularity));
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1119
    __ br(Assembler::HI, copy80);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1120
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  1121
    __ cmp(count, u1(32/granularity));
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1122
    __ br(Assembler::LS, copy32);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1123
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1124
    // 33..64 bytes
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1125
    if (UseSIMDForMemoryOps) {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1126
      __ ldpq(v0, v1, Address(s, 0));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1127
      __ ldpq(v2, v3, Address(send, -32));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1128
      __ stpq(v0, v1, Address(d, 0));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1129
      __ stpq(v2, v3, Address(dend, -32));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1130
    } else {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1131
      __ ldp(t0, t1, Address(s, 0));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1132
      __ ldp(t2, t3, Address(s, 16));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1133
      __ ldp(t4, t5, Address(send, -32));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1134
      __ ldp(t6, t7, Address(send, -16));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1135
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1136
      __ stp(t0, t1, Address(d, 0));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1137
      __ stp(t2, t3, Address(d, 16));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1138
      __ stp(t4, t5, Address(dend, -32));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1139
      __ stp(t6, t7, Address(dend, -16));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1140
    }
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1141
    __ b(finish);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1142
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1143
    // 17..32 bytes
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1144
    __ bind(copy32);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1145
    __ ldp(t0, t1, Address(s, 0));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1146
    __ ldp(t2, t3, Address(send, -16));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1147
    __ stp(t0, t1, Address(d, 0));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1148
    __ stp(t2, t3, Address(dend, -16));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1149
    __ b(finish);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1150
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1151
    // 65..80/96 bytes
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1152
    // (96 bytes if SIMD because we do 32 byes per instruction)
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1153
    __ bind(copy80);
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1154
    if (UseSIMDForMemoryOps) {
40023
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1155
      __ ld4(v0, v1, v2, v3, __ T16B, Address(s, 0));
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1156
      __ ldpq(v4, v5, Address(send, -32));
40023
49d647eeb7f0 8159063: aarch64: optimise unaligned array copy long
enevill
parents: 39265
diff changeset
  1157
      __ st4(v0, v1, v2, v3, __ T16B, Address(d, 0));
36564
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1158
      __ stpq(v4, v5, Address(dend, -32));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1159
    } else {
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1160
      __ ldp(t0, t1, Address(s, 0));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1161
      __ ldp(t2, t3, Address(s, 16));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1162
      __ ldp(t4, t5, Address(s, 32));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1163
      __ ldp(t6, t7, Address(s, 48));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1164
      __ ldp(t8, t9, Address(send, -16));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1165
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1166
      __ stp(t0, t1, Address(d, 0));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1167
      __ stp(t2, t3, Address(d, 16));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1168
      __ stp(t4, t5, Address(d, 32));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1169
      __ stp(t6, t7, Address(d, 48));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1170
      __ stp(t8, t9, Address(dend, -16));
9442bb67de26 8150313: aarch64: optimise array copy using SIMD instructions
enevill
parents: 36563
diff changeset
  1171
    }
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1172
    __ b(finish);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1173
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1174
    // 0..16 bytes
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1175
    __ bind(copy16);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  1176
    __ cmp(count, u1(8/granularity));
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1177
    __ br(Assembler::LO, copy8);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1178
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1179
    // 8..16 bytes
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1180
    __ ldr(t0, Address(s, 0));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1181
    __ ldr(t1, Address(send, -8));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1182
    __ str(t0, Address(d, 0));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1183
    __ str(t1, Address(dend, -8));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1184
    __ b(finish);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1185
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1186
    if (granularity < 8) {
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1187
      // 4..7 bytes
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1188
      __ bind(copy8);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1189
      __ tbz(count, 2 - exact_log2(granularity), copy4);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1190
      __ ldrw(t0, Address(s, 0));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1191
      __ ldrw(t1, Address(send, -4));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1192
      __ strw(t0, Address(d, 0));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1193
      __ strw(t1, Address(dend, -4));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1194
      __ b(finish);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1195
      if (granularity < 4) {
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1196
        // 0..3 bytes
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1197
        __ bind(copy4);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1198
        __ cbz(count, finish); // get rid of 0 case
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1199
        if (granularity == 2) {
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1200
          __ ldrh(t0, Address(s, 0));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1201
          __ strh(t0, Address(d, 0));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1202
        } else { // granularity == 1
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1203
          // Now 1..3 bytes. Handle the 1 and 2 byte case by copying
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1204
          // the first and last byte.
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1205
          // Handle the 3 byte case by loading and storing base + count/2
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1206
          // (count == 1 (s+0)->(d+0), count == 2,3 (s+1) -> (d+1))
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1207
          // This does means in the 1 byte case we load/store the same
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1208
          // byte 3 times.
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1209
          __ lsr(count, count, 1);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1210
          __ ldrb(t0, Address(s, 0));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1211
          __ ldrb(t1, Address(send, -1));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1212
          __ ldrb(t2, Address(s, count));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1213
          __ strb(t0, Address(d, 0));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1214
          __ strb(t1, Address(dend, -1));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1215
          __ strb(t2, Address(d, count));
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1216
        }
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1217
        __ b(finish);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1218
      }
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1219
    }
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1220
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1221
    __ bind(copy_big);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1222
    if (is_backwards) {
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1223
      __ lea(s, Address(s, count, Address::lsl(exact_log2(-step))));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1224
      __ lea(d, Address(d, count, Address::lsl(exact_log2(-step))));
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1225
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1226
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1227
    // Now we've got the small case out of the way we can align the
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1228
    // source address on a 2-word boundary.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1229
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1230
    Label aligned;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1231
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1232
    if (is_aligned) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1233
      // We may have to adjust by 1 word to get s 2-word-aligned.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1234
      __ tbz(s, exact_log2(wordSize), aligned);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1235
      __ ldr(tmp, Address(__ adjust(s, direction * wordSize, is_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1236
      __ str(tmp, Address(__ adjust(d, direction * wordSize, is_backwards)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1237
      __ sub(count, count, wordSize/granularity);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1238
    } else {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1239
      if (is_backwards) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1240
        __ andr(rscratch2, s, 2 * wordSize - 1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1241
      } else {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1242
        __ neg(rscratch2, s);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1243
        __ andr(rscratch2, rscratch2, 2 * wordSize - 1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1244
      }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1245
      // rscratch2 is the byte adjustment needed to align s.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1246
      __ cbz(rscratch2, aligned);
35752
16265e7c7a53 8148328: aarch64: redundant lsr instructions in stub code.
fyang
parents: 35579
diff changeset
  1247
      int shift = exact_log2(granularity);
16265e7c7a53 8148328: aarch64: redundant lsr instructions in stub code.
fyang
parents: 35579
diff changeset
  1248
      if (shift)  __ lsr(rscratch2, rscratch2, shift);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1249
      __ sub(count, count, rscratch2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1250
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1251
#if 0
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1252
      // ?? This code is only correct for a disjoint copy.  It may or
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1253
      // may not make sense to use it in that case.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1254
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1255
      // Copy the first pair; s and d may not be aligned.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1256
      __ ldp(t0, t1, Address(s, is_backwards ? -2 * wordSize : 0));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1257
      __ stp(t0, t1, Address(d, is_backwards ? -2 * wordSize : 0));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1258
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1259
      // Align s and d, adjust count
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1260
      if (is_backwards) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1261
        __ sub(s, s, rscratch2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1262
        __ sub(d, d, rscratch2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1263
      } else {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1264
        __ add(s, s, rscratch2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1265
        __ add(d, d, rscratch2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1266
      }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1267
#else
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1268
      copy_memory_small(s, d, rscratch2, rscratch1, step);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1269
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1270
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1271
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1272
    __ bind(aligned);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1273
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1274
    // s is now 2-word-aligned.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1275
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1276
    // We have a count of units and some trailing bytes.  Adjust the
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1277
    // count and do a bulk copy of words.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1278
    __ lsr(rscratch2, count, exact_log2(wordSize/granularity));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1279
    if (direction == copy_forwards)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1280
      __ bl(copy_f);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1281
    else
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1282
      __ bl(copy_b);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1283
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1284
    // And the tail.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1285
    copy_memory_small(s, d, count, tmp, step);
36563
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1286
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1287
    if (granularity >= 8) __ bind(copy8);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1288
    if (granularity >= 4) __ bind(copy4);
0b48c2c8ad13 8150082: aarch64: optimise small array copy
enevill
parents: 36340
diff changeset
  1289
    __ bind(finish);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1290
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1291
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1292
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1293
  void clobber_registers() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1294
#ifdef ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1295
    __ mov(rscratch1, (uint64_t)0xdeadbeef);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1296
    __ orr(rscratch1, rscratch1, rscratch1, Assembler::LSL, 32);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1297
    for (Register r = r3; r <= r18; r++)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1298
      if (r != rscratch1) __ mov(r, rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1299
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1300
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1301
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1302
  // Scan over array at a for count oops, verifying each one.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1303
  // Preserves a and count, clobbers rscratch1 and rscratch2.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1304
  void verify_oop_array (size_t size, Register a, Register count, Register temp) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1305
    Label loop, end;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1306
    __ mov(rscratch1, a);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1307
    __ mov(rscratch2, zr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1308
    __ bind(loop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1309
    __ cmp(rscratch2, count);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1310
    __ br(Assembler::HS, end);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1311
    if (size == (size_t)wordSize) {
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1312
      __ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size))));
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1313
      __ verify_oop(temp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1314
    } else {
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1315
      __ ldrw(r16, Address(a, rscratch2, Address::lsl(exact_log2(size))));
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1316
      __ decode_heap_oop(temp); // calls verify_oop
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1317
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1318
    __ add(rscratch2, rscratch2, size);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1319
    __ b(loop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1320
    __ bind(end);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1321
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1322
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1323
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1324
  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1325
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1326
  //   is_oop  - true => oop array, so generate store check code
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1327
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1328
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1329
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1330
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1331
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1332
  //   c_rarg2   - element count, treated as ssize_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1333
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1334
  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1335
  // the hardware handle it.  The two dwords within qwords that span
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1336
  // cache line boundaries will still be loaded and stored atomicly.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1337
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1338
  // Side Effects:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1339
  //   disjoint_int_copy_entry is set to the no-overlap entry point
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1340
  //   used by generate_conjoint_int_oop_copy().
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1341
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1342
  address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1343
                                  const char *name, bool dest_uninitialized = false) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1344
    Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
46695
aaaac1d98bc5 8183533: AArch64: redundent registers saving in arraycopy stubs
njian
parents: 46625
diff changeset
  1345
    RegSet saved_reg = RegSet::of(s, d, count);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1346
    __ align(CodeEntryAlignment);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1347
    StubCodeMark mark(this, "StubRoutines", name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1348
    address start = __ pc();
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1349
    __ enter();
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1350
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1351
    if (entry != NULL) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1352
      *entry = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1353
      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1354
      BLOCK_COMMENT("Entry:");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1355
    }
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1356
50728
9375184cec98 8205459: Rename Access API flag decorators
kbarrett
parents: 50242
diff changeset
  1357
    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1358
    if (dest_uninitialized) {
50728
9375184cec98 8205459: Rename Access API flag decorators
kbarrett
parents: 50242
diff changeset
  1359
      decorators |= IS_DEST_UNINITIALIZED;
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1360
    }
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1361
    if (aligned) {
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1362
      decorators |= ARRAYCOPY_ALIGNED;
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1363
    }
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1364
49754
ee93c1087584 8201362: Remove CollectedHeap::barrier_set()
pliden
parents: 49724
diff changeset
  1365
    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1366
    bs->arraycopy_prologue(_masm, decorators, is_oop, d, count, saved_reg);
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1367
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1368
    if (is_oop) {
46695
aaaac1d98bc5 8183533: AArch64: redundent registers saving in arraycopy stubs
njian
parents: 46625
diff changeset
  1369
      // save regs before copy_memory
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1370
      __ push(RegSet::of(d, count), sp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1371
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1372
    copy_memory(aligned, s, d, count, rscratch1, size);
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1373
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1374
    if (is_oop) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1375
      __ pop(RegSet::of(d, count), sp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1376
      if (VerifyOops)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1377
        verify_oop_array(size, d, count, r16);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1378
      __ sub(count, count, 1); // make an inclusive end pointer
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1379
      __ lea(count, Address(d, count, Address::lsl(exact_log2(size))));
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1380
    }
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1381
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1382
    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, rscratch1, RegSet());
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1383
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1384
    __ leave();
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1385
    __ mov(r0, zr); // return 0
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1386
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1387
#ifdef BUILTIN_SIM
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1388
    {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1389
      AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1390
      sim->notifyCompile(const_cast<char*>(name), start);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1391
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1392
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1393
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1394
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1395
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1396
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1397
  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1398
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1399
  //   is_oop  - true => oop array, so generate store check code
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1400
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1401
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1402
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1403
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1404
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1405
  //   c_rarg2   - element count, treated as ssize_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1406
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1407
  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1408
  // the hardware handle it.  The two dwords within qwords that span
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1409
  // cache line boundaries will still be loaded and stored atomicly.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1410
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1411
  address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1412
                                 address *entry, const char *name,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1413
                                 bool dest_uninitialized = false) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1414
    Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
46695
aaaac1d98bc5 8183533: AArch64: redundent registers saving in arraycopy stubs
njian
parents: 46625
diff changeset
  1415
    RegSet saved_regs = RegSet::of(s, d, count);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1416
    StubCodeMark mark(this, "StubRoutines", name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1417
    address start = __ pc();
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1418
    __ enter();
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1419
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1420
    if (entry != NULL) {
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1421
      *entry = __ pc();
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1422
      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1423
      BLOCK_COMMENT("Entry:");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1424
    }
35843
67b6050f5ce8 8149080: AArch64: Recognise disjoint array copy in stub code
hshi
parents: 35841
diff changeset
  1425
67b6050f5ce8 8149080: AArch64: Recognise disjoint array copy in stub code
hshi
parents: 35841
diff changeset
  1426
    // use fwd copy when (d-s) above_equal (count*size)
67b6050f5ce8 8149080: AArch64: Recognise disjoint array copy in stub code
hshi
parents: 35841
diff changeset
  1427
    __ sub(rscratch1, d, s);
67b6050f5ce8 8149080: AArch64: Recognise disjoint array copy in stub code
hshi
parents: 35841
diff changeset
  1428
    __ cmp(rscratch1, count, Assembler::LSL, exact_log2(size));
67b6050f5ce8 8149080: AArch64: Recognise disjoint array copy in stub code
hshi
parents: 35841
diff changeset
  1429
    __ br(Assembler::HS, nooverlap_target);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1430
50728
9375184cec98 8205459: Rename Access API flag decorators
kbarrett
parents: 50242
diff changeset
  1431
    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1432
    if (dest_uninitialized) {
50728
9375184cec98 8205459: Rename Access API flag decorators
kbarrett
parents: 50242
diff changeset
  1433
      decorators |= IS_DEST_UNINITIALIZED;
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1434
    }
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1435
    if (aligned) {
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1436
      decorators |= ARRAYCOPY_ALIGNED;
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1437
    }
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1438
49754
ee93c1087584 8201362: Remove CollectedHeap::barrier_set()
pliden
parents: 49724
diff changeset
  1439
    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1440
    bs->arraycopy_prologue(_masm, decorators, is_oop, d, count, saved_regs);
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1441
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1442
    if (is_oop) {
46695
aaaac1d98bc5 8183533: AArch64: redundent registers saving in arraycopy stubs
njian
parents: 46625
diff changeset
  1443
      // save regs before copy_memory
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1444
      __ push(RegSet::of(d, count), sp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1445
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1446
    copy_memory(aligned, s, d, count, rscratch1, -size);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1447
    if (is_oop) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1448
      __ pop(RegSet::of(d, count), sp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1449
      if (VerifyOops)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1450
        verify_oop_array(size, d, count, r16);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1451
      __ sub(count, count, 1); // make an inclusive end pointer
39232
118d17fef4f9 8156731: aarch64: java/util/Arrays/Correct.java fails due to _generic_arraycopy stub routine
fyang
parents: 38233
diff changeset
  1452
      __ lea(count, Address(d, count, Address::lsl(exact_log2(size))));
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1453
    }
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1454
    bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, rscratch1, RegSet());
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1455
    __ leave();
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1456
    __ mov(r0, zr); // return 0
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1457
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1458
#ifdef BUILTIN_SIM
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1459
    {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1460
      AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1461
      sim->notifyCompile(const_cast<char*>(name), start);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1462
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1463
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1464
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1465
}
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1466
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1467
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1468
  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1469
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1470
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1471
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1472
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1473
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1474
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1475
  //   c_rarg2   - element count, treated as ssize_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1476
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1477
  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1478
  // we let the hardware handle it.  The one to eight bytes within words,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1479
  // dwords or qwords that span cache line boundaries will still be loaded
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1480
  // and stored atomically.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1481
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1482
  // Side Effects:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1483
  //   disjoint_byte_copy_entry is set to the no-overlap entry point  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1484
  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1485
  // we let the hardware handle it.  The one to eight bytes within words,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1486
  // dwords or qwords that span cache line boundaries will still be loaded
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1487
  // and stored atomically.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1488
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1489
  // Side Effects:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1490
  //   disjoint_byte_copy_entry is set to the no-overlap entry point
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1491
  //   used by generate_conjoint_byte_copy().
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1492
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1493
  address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1494
    const bool not_oop = false;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1495
    return generate_disjoint_copy(sizeof (jbyte), aligned, not_oop, entry, name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1496
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1497
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1498
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1499
  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1500
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1501
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1502
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1503
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1504
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1505
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1506
  //   c_rarg2   - element count, treated as ssize_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1507
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1508
  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1509
  // we let the hardware handle it.  The one to eight bytes within words,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1510
  // dwords or qwords that span cache line boundaries will still be loaded
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1511
  // and stored atomically.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1512
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1513
  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1514
                                      address* entry, const char *name) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1515
    const bool not_oop = false;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1516
    return generate_conjoint_copy(sizeof (jbyte), aligned, not_oop, nooverlap_target, entry, name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1517
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1518
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1519
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1520
  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1521
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1522
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1523
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1524
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1525
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1526
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1527
  //   c_rarg2   - element count, treated as ssize_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1528
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1529
  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1530
  // let the hardware handle it.  The two or four words within dwords
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1531
  // or qwords that span cache line boundaries will still be loaded
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1532
  // and stored atomically.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1533
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1534
  // Side Effects:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1535
  //   disjoint_short_copy_entry is set to the no-overlap entry point
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1536
  //   used by generate_conjoint_short_copy().
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1537
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1538
  address generate_disjoint_short_copy(bool aligned,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1539
                                       address* entry, const char *name) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1540
    const bool not_oop = false;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1541
    return generate_disjoint_copy(sizeof (jshort), aligned, not_oop, entry, name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1542
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1543
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1544
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1545
  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1546
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1547
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1548
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1549
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1550
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1551
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1552
  //   c_rarg2   - element count, treated as ssize_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1553
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1554
  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1555
  // let the hardware handle it.  The two or four words within dwords
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1556
  // or qwords that span cache line boundaries will still be loaded
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1557
  // and stored atomically.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1558
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1559
  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1560
                                       address *entry, const char *name) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1561
    const bool not_oop = false;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1562
    return generate_conjoint_copy(sizeof (jshort), aligned, not_oop, nooverlap_target, entry, name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1563
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1564
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1565
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1566
  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1567
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1568
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1569
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1570
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1571
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1572
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1573
  //   c_rarg2   - element count, treated as ssize_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1574
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1575
  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1576
  // the hardware handle it.  The two dwords within qwords that span
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1577
  // cache line boundaries will still be loaded and stored atomicly.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1578
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1579
  // Side Effects:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1580
  //   disjoint_int_copy_entry is set to the no-overlap entry point
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1581
  //   used by generate_conjoint_int_oop_copy().
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1582
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1583
  address generate_disjoint_int_copy(bool aligned, address *entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1584
                                         const char *name, bool dest_uninitialized = false) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1585
    const bool not_oop = false;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1586
    return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1587
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1588
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1589
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1590
  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1591
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1592
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1593
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1594
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1595
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1596
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1597
  //   c_rarg2   - element count, treated as ssize_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1598
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1599
  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1600
  // the hardware handle it.  The two dwords within qwords that span
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1601
  // cache line boundaries will still be loaded and stored atomicly.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1602
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1603
  address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1604
                                     address *entry, const char *name,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1605
                                     bool dest_uninitialized = false) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1606
    const bool not_oop = false;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1607
    return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1608
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1609
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1610
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1611
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1612
  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1613
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1614
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1615
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1616
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1617
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1618
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1619
  //   c_rarg2   - element count, treated as size_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1620
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1621
  // Side Effects:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1622
  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1623
  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1624
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1625
  address generate_disjoint_long_copy(bool aligned, address *entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1626
                                          const char *name, bool dest_uninitialized = false) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1627
    const bool not_oop = false;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1628
    return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1629
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1630
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1631
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1632
  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1633
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1634
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1635
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1636
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1637
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1638
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1639
  //   c_rarg2   - element count, treated as size_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1640
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1641
  address generate_conjoint_long_copy(bool aligned,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1642
                                      address nooverlap_target, address *entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1643
                                      const char *name, bool dest_uninitialized = false) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1644
    const bool not_oop = false;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1645
    return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1646
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1647
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1648
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1649
  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1650
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1651
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1652
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1653
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1654
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1655
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1656
  //   c_rarg2   - element count, treated as size_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1657
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1658
  // Side Effects:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1659
  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1660
  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1661
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1662
  address generate_disjoint_oop_copy(bool aligned, address *entry,
36326
d25af58cfc94 8150045: arraycopy causes segfaults in SATB during garbage collection
aph
parents: 35843
diff changeset
  1663
                                     const char *name, bool dest_uninitialized) {
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1664
    const bool is_oop = true;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1665
    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
36326
d25af58cfc94 8150045: arraycopy causes segfaults in SATB during garbage collection
aph
parents: 35843
diff changeset
  1666
    return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1667
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1668
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1669
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1670
  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1671
  //             ignored
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1672
  //   name    - stub name string
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1673
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1674
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1675
  //   c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1676
  //   c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1677
  //   c_rarg2   - element count, treated as size_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1678
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1679
  address generate_conjoint_oop_copy(bool aligned,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1680
                                     address nooverlap_target, address *entry,
36326
d25af58cfc94 8150045: arraycopy causes segfaults in SATB during garbage collection
aph
parents: 35843
diff changeset
  1681
                                     const char *name, bool dest_uninitialized) {
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1682
    const bool is_oop = true;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1683
    const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
36326
d25af58cfc94 8150045: arraycopy causes segfaults in SATB during garbage collection
aph
parents: 35843
diff changeset
  1684
    return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
d25af58cfc94 8150045: arraycopy causes segfaults in SATB during garbage collection
aph
parents: 35843
diff changeset
  1685
                                  name, dest_uninitialized);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1686
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1687
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1688
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1689
  // Helper for generating a dynamic type check.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1690
  // Smashes rscratch1.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1691
  void generate_type_check(Register sub_klass,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1692
                           Register super_check_offset,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1693
                           Register super_klass,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1694
                           Label& L_success) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1695
    assert_different_registers(sub_klass, super_check_offset, super_klass);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1696
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1697
    BLOCK_COMMENT("type_check:");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1698
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1699
    Label L_miss;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1700
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1701
    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg,        &L_success, &L_miss, NULL,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1702
                                     super_check_offset);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1703
    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1704
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1705
    // Fall through on failure!
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1706
    __ BIND(L_miss);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1707
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1708
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1709
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1710
  //  Generate checkcasting array copy stub
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1711
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1712
  //  Input:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1713
  //    c_rarg0   - source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1714
  //    c_rarg1   - destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1715
  //    c_rarg2   - element count, treated as ssize_t, can be zero
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1716
  //    c_rarg3   - size_t ckoff (super_check_offset)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1717
  //    c_rarg4   - oop ckval (super_klass)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1718
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1719
  //  Output:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1720
  //    r0 ==  0  -  success
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1721
  //    r0 == -1^K - failure, where K is partial transfer count
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1722
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1723
  address generate_checkcast_copy(const char *name, address *entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1724
                                  bool dest_uninitialized = false) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1725
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1726
    Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1727
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1728
    // Input registers (after setup_arg_regs)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1729
    const Register from        = c_rarg0;   // source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1730
    const Register to          = c_rarg1;   // destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1731
    const Register count       = c_rarg2;   // elementscount
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1732
    const Register ckoff       = c_rarg3;   // super_check_offset
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1733
    const Register ckval       = c_rarg4;   // super_klass
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1734
46695
aaaac1d98bc5 8183533: AArch64: redundent registers saving in arraycopy stubs
njian
parents: 46625
diff changeset
  1735
    RegSet wb_pre_saved_regs = RegSet::range(c_rarg0, c_rarg4);
aaaac1d98bc5 8183533: AArch64: redundent registers saving in arraycopy stubs
njian
parents: 46625
diff changeset
  1736
    RegSet wb_post_saved_regs = RegSet::of(count);
aaaac1d98bc5 8183533: AArch64: redundent registers saving in arraycopy stubs
njian
parents: 46625
diff changeset
  1737
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1738
    // Registers used as temps (r18, r19, r20 are save-on-entry)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1739
    const Register count_save  = r21;       // orig elementscount
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1740
    const Register start_to    = r20;       // destination array start address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1741
    const Register copied_oop  = r18;       // actual oop copied
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1742
    const Register r19_klass   = r19;       // oop._klass
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1743
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1744
    //---------------------------------------------------------------
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1745
    // Assembler stub will be used for this call to arraycopy
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1746
    // if the two arrays are subtypes of Object[] but the
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1747
    // destination array type is not equal to or a supertype
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1748
    // of the source type.  Each element must be separately
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1749
    // checked.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1750
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1751
    assert_different_registers(from, to, count, ckoff, ckval, start_to,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1752
                               copied_oop, r19_klass, count_save);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1753
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1754
    __ align(CodeEntryAlignment);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1755
    StubCodeMark mark(this, "StubRoutines", name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1756
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1757
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1758
    __ enter(); // required for proper stackwalking of RuntimeStub frame
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1759
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1760
#ifdef ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1761
    // caller guarantees that the arrays really are different
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1762
    // otherwise, we would have to make conjoint checks
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1763
    { Label L;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1764
      array_overlap_test(L, TIMES_OOP);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1765
      __ stop("checkcast_copy within a single array");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1766
      __ bind(L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1767
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1768
#endif //ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1769
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1770
    // Caller of this entry point must set up the argument registers.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1771
    if (entry != NULL) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1772
      *entry = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1773
      BLOCK_COMMENT("Entry:");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1774
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1775
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1776
     // Empty array:  Nothing to do.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1777
    __ cbz(count, L_done);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1778
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1779
    __ push(RegSet::of(r18, r19, r20, r21), sp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1780
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1781
#ifdef ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1782
    BLOCK_COMMENT("assert consistent ckoff/ckval");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1783
    // The ckoff and ckval must be mutually consistent,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1784
    // even though caller generates both.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1785
    { Label L;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1786
      int sco_offset = in_bytes(Klass::super_check_offset_offset());
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1787
      __ ldrw(start_to, Address(ckval, sco_offset));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1788
      __ cmpw(ckoff, start_to);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1789
      __ br(Assembler::EQ, L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1790
      __ stop("super_check_offset inconsistent");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1791
      __ bind(L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1792
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1793
#endif //ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1794
50728
9375184cec98 8205459: Rename Access API flag decorators
kbarrett
parents: 50242
diff changeset
  1795
    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST;
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1796
    bool is_oop = true;
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1797
    if (dest_uninitialized) {
50728
9375184cec98 8205459: Rename Access API flag decorators
kbarrett
parents: 50242
diff changeset
  1798
      decorators |= IS_DEST_UNINITIALIZED;
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1799
    }
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1800
49754
ee93c1087584 8201362: Remove CollectedHeap::barrier_set()
pliden
parents: 49724
diff changeset
  1801
    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1802
    bs->arraycopy_prologue(_masm, decorators, is_oop, to, count, wb_pre_saved_regs);
36326
d25af58cfc94 8150045: arraycopy causes segfaults in SATB during garbage collection
aph
parents: 35843
diff changeset
  1803
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1804
    // save the original count
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1805
    __ mov(count_save, count);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1806
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1807
    // Copy from low to high addresses
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1808
    __ mov(start_to, to);              // Save destination array start address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1809
    __ b(L_load_element);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1810
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1811
    // ======== begin loop ========
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1812
    // (Loop is rotated; its entry is L_load_element.)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1813
    // Loop control:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1814
    //   for (; count != 0; count--) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1815
    //     copied_oop = load_heap_oop(from++);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1816
    //     ... generate_type_check ...;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1817
    //     store_heap_oop(to++, copied_oop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1818
    //   }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1819
    __ align(OptoLoopAlignment);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1820
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1821
    __ BIND(L_store_element);
50110
3d98842c8677 8202714: Create a MacroAssembler::access_load/store_at wrapper for AArch64
rkennke
parents: 49754
diff changeset
  1822
    __ store_heap_oop(__ post(to, UseCompressedOops ? 4 : 8), copied_oop, noreg, noreg, AS_RAW);  // store the oop
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1823
    __ sub(count, count, 1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1824
    __ cbz(count, L_do_card_marks);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1825
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1826
    // ======== loop entry is here ========
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1827
    __ BIND(L_load_element);
50110
3d98842c8677 8202714: Create a MacroAssembler::access_load/store_at wrapper for AArch64
rkennke
parents: 49754
diff changeset
  1828
    __ load_heap_oop(copied_oop, __ post(from, UseCompressedOops ? 4 : 8), noreg, noreg, AS_RAW); // load the oop
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1829
    __ cbz(copied_oop, L_store_element);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1830
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1831
    __ load_klass(r19_klass, copied_oop);// query the object klass
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1832
    generate_type_check(r19_klass, ckoff, ckval, L_store_element);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1833
    // ======== end loop ========
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1834
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1835
    // It was a real error; we must depend on the caller to finish the job.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1836
    // Register count = remaining oops, count_orig = total oops.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1837
    // Emit GC store barriers for the oops we have copied and report
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1838
    // their number to the caller.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1839
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1840
    __ subs(count, count_save, count);     // K = partially copied oop count
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1841
    __ eon(count, count, zr);                   // report (-1^K) to caller
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1842
    __ br(Assembler::EQ, L_done_pop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1843
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1844
    __ BIND(L_do_card_marks);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1845
    __ add(to, to, -heapOopSize);         // make an inclusive end pointer
49484
ee8fa73b90f9 8198949: Modularize arraycopy stub routine GC barriers
eosterlund
parents: 49455
diff changeset
  1846
    bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, to, rscratch1, wb_post_saved_regs);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1847
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1848
    __ bind(L_done_pop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1849
    __ pop(RegSet::of(r18, r19, r20, r21), sp);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1850
    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1851
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1852
    __ bind(L_done);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1853
    __ mov(r0, count);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1854
    __ leave();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1855
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1856
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1857
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1858
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1859
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1860
  // Perform range checks on the proposed arraycopy.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1861
  // Kills temp, but nothing else.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1862
  // Also, clean the sign bits of src_pos and dst_pos.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1863
  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1864
                              Register src_pos, // source position (c_rarg1)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1865
                              Register dst,     // destination array oo (c_rarg2)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1866
                              Register dst_pos, // destination position (c_rarg3)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1867
                              Register length,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1868
                              Register temp,
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1869
                              Label& L_failed) {
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1870
    BLOCK_COMMENT("arraycopy_range_checks:");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1871
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1872
    assert_different_registers(rscratch1, temp);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1873
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1874
    //  if (src_pos + length > arrayOop(src)->length())  FAIL;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1875
    __ ldrw(rscratch1, Address(src, arrayOopDesc::length_offset_in_bytes()));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1876
    __ addw(temp, length, src_pos);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1877
    __ cmpw(temp, rscratch1);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1878
    __ br(Assembler::HI, L_failed);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1879
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1880
    //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1881
    __ ldrw(rscratch1, Address(dst, arrayOopDesc::length_offset_in_bytes()));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1882
    __ addw(temp, length, dst_pos);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1883
    __ cmpw(temp, rscratch1);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1884
    __ br(Assembler::HI, L_failed);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1885
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1886
    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1887
    __ movw(src_pos, src_pos);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1888
    __ movw(dst_pos, dst_pos);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1889
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1890
    BLOCK_COMMENT("arraycopy_range_checks done");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1891
  }
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1892
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1893
  // These stubs get called from some dumb test routine.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1894
  // I'll write them properly when they're called from
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1895
  // something that's actually doing something.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1896
  static void fake_arraycopy_stub(address src, address dst, int count) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1897
    assert(count == 0, "huh?");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1898
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1899
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  1900
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1901
  //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1902
  //  Generate 'unsafe' array copy stub
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1903
  //  Though just as safe as the other stubs, it takes an unscaled
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1904
  //  size_t argument instead of an element count.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1905
  //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1906
  //  Input:
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1907
  //    c_rarg0   - source array address
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1908
  //    c_rarg1   - destination array address
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1909
  //    c_rarg2   - byte count, treated as ssize_t, can be zero
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1910
  //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1911
  // Examines the alignment of the operands and dispatches
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1912
  // to a long, int, short, or byte copy loop.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1913
  //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1914
  address generate_unsafe_copy(const char *name,
37271
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1915
                               address byte_copy_entry,
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1916
                               address short_copy_entry,
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1917
                               address int_copy_entry,
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1918
                               address long_copy_entry) {
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1919
    Label L_long_aligned, L_int_aligned, L_short_aligned;
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1920
    Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1921
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1922
    __ align(CodeEntryAlignment);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1923
    StubCodeMark mark(this, "StubRoutines", name);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1924
    address start = __ pc();
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1925
    __ enter(); // required for proper stackwalking of RuntimeStub frame
37271
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1926
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1927
    // bump this on entry, not on exit:
37271
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1928
    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1929
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1930
    __ orr(rscratch1, s, d);
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1931
    __ orr(rscratch1, rscratch1, count);
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1932
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1933
    __ andr(rscratch1, rscratch1, BytesPerLong-1);
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1934
    __ cbz(rscratch1, L_long_aligned);
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1935
    __ andr(rscratch1, rscratch1, BytesPerInt-1);
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1936
    __ cbz(rscratch1, L_int_aligned);
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1937
    __ tbz(rscratch1, 0, L_short_aligned);
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1938
    __ b(RuntimeAddress(byte_copy_entry));
37271
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1939
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1940
    __ BIND(L_short_aligned);
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1941
    __ lsr(count, count, LogBytesPerShort);  // size => short_count
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1942
    __ b(RuntimeAddress(short_copy_entry));
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1943
    __ BIND(L_int_aligned);
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1944
    __ lsr(count, count, LogBytesPerInt);    // size => int_count
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1945
    __ b(RuntimeAddress(int_copy_entry));
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1946
    __ BIND(L_long_aligned);
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1947
    __ lsr(count, count, LogBytesPerLong);   // size => long_count
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1948
    __ b(RuntimeAddress(long_copy_entry));
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  1949
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1950
    return start;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1951
  }
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1952
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1953
  //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1954
  //  Generate generic array copy stubs
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1955
  //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1956
  //  Input:
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1957
  //    c_rarg0    -  src oop
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1958
  //    c_rarg1    -  src_pos (32-bits)
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1959
  //    c_rarg2    -  dst oop
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1960
  //    c_rarg3    -  dst_pos (32-bits)
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1961
  //    c_rarg4    -  element count (32-bits)
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1962
  //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1963
  //  Output:
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1964
  //    r0 ==  0  -  success
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1965
  //    r0 == -1^K - failure, where K is partial transfer count
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1966
  //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1967
  address generate_generic_copy(const char *name,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1968
                                address byte_copy_entry, address short_copy_entry,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1969
                                address int_copy_entry, address oop_copy_entry,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1970
                                address long_copy_entry, address checkcast_copy_entry) {
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1971
51756
4bd35a5ec694 8210676: Remove some unused Label variables
mikael
parents: 51619
diff changeset
  1972
    Label L_failed, L_objArray;
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1973
    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1974
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1975
    // Input registers
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1976
    const Register src        = c_rarg0;  // source array oop
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1977
    const Register src_pos    = c_rarg1;  // source position
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1978
    const Register dst        = c_rarg2;  // destination array oop
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1979
    const Register dst_pos    = c_rarg3;  // destination position
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1980
    const Register length     = c_rarg4;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1981
52977
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
  1982
    __ align(CodeEntryAlignment);
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
  1983
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1984
    StubCodeMark mark(this, "StubRoutines", name);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1985
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1986
    address start = __ pc();
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1987
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1988
    __ enter(); // required for proper stackwalking of RuntimeStub frame
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1989
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1990
    // bump this on entry, not on exit:
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1991
    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1992
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1993
    //-----------------------------------------------------------------------
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1994
    // Assembler stub will be used for this call to arraycopy
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1995
    // if the following conditions are met:
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1996
    //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1997
    // (1) src and dst must not be null.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1998
    // (2) src_pos must not be negative.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  1999
    // (3) dst_pos must not be negative.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2000
    // (4) length  must not be negative.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2001
    // (5) src klass and dst klass should be the same and not NULL.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2002
    // (6) src and dst should be arrays.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2003
    // (7) src_pos + length must not exceed length of src.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2004
    // (8) dst_pos + length must not exceed length of dst.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2005
    //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2006
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2007
    //  if (src == NULL) return -1;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2008
    __ cbz(src, L_failed);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2009
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2010
    //  if (src_pos < 0) return -1;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2011
    __ tbnz(src_pos, 31, L_failed);  // i.e. sign bit set
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2012
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2013
    //  if (dst == NULL) return -1;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2014
    __ cbz(dst, L_failed);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2015
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2016
    //  if (dst_pos < 0) return -1;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2017
    __ tbnz(dst_pos, 31, L_failed);  // i.e. sign bit set
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2018
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2019
    // registers used as temp
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2020
    const Register scratch_length    = r16; // elements count to copy
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2021
    const Register scratch_src_klass = r17; // array klass
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2022
    const Register lh                = r18; // layout helper
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2023
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2024
    //  if (length < 0) return -1;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2025
    __ movw(scratch_length, length);        // length (elements count, 32-bits value)
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2026
    __ tbnz(scratch_length, 31, L_failed);  // i.e. sign bit set
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2027
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2028
    __ load_klass(scratch_src_klass, src);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2029
#ifdef ASSERT
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2030
    //  assert(src->klass() != NULL);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2031
    {
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2032
      BLOCK_COMMENT("assert klasses not null {");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2033
      Label L1, L2;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2034
      __ cbnz(scratch_src_klass, L2);   // it is broken if klass is NULL
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2035
      __ bind(L1);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2036
      __ stop("broken null klass");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2037
      __ bind(L2);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2038
      __ load_klass(rscratch1, dst);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2039
      __ cbz(rscratch1, L1);     // this would be broken also
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2040
      BLOCK_COMMENT("} assert klasses not null done");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2041
    }
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2042
#endif
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2043
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2044
    // Load layout helper (32-bits)
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2045
    //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2046
    //  |array_tag|     | header_size | element_type |     |log2_element_size|
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2047
    // 32        30    24            16              8     2                 0
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2048
    //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2049
    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2050
    //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2051
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2052
    const int lh_offset = in_bytes(Klass::layout_helper_offset());
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2053
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2054
    // Handle objArrays completely differently...
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2055
    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2056
    __ ldrw(lh, Address(scratch_src_klass, lh_offset));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2057
    __ movw(rscratch1, objArray_lh);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2058
    __ eorw(rscratch2, lh, rscratch1);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2059
    __ cbzw(rscratch2, L_objArray);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2060
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2061
    //  if (src->klass() != dst->klass()) return -1;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2062
    __ load_klass(rscratch2, dst);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2063
    __ eor(rscratch2, rscratch2, scratch_src_klass);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2064
    __ cbnz(rscratch2, L_failed);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2065
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2066
    //  if (!src->is_Array()) return -1;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2067
    __ tbz(lh, 31, L_failed);  // i.e. (lh >= 0)
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2068
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2069
    // At this point, it is known to be a typeArray (array_tag 0x3).
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2070
#ifdef ASSERT
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2071
    {
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2072
      BLOCK_COMMENT("assert primitive array {");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2073
      Label L;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2074
      __ movw(rscratch2, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2075
      __ cmpw(lh, rscratch2);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2076
      __ br(Assembler::GE, L);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2077
      __ stop("must be a primitive array");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2078
      __ bind(L);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2079
      BLOCK_COMMENT("} assert primitive array done");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2080
    }
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2081
#endif
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2082
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2083
    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2084
                           rscratch2, L_failed);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2085
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2086
    // TypeArrayKlass
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2087
    //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2088
    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2089
    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2090
    //
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2091
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2092
    const Register rscratch1_offset = rscratch1;    // array offset
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2093
    const Register r18_elsize = lh; // element size
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2094
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2095
    __ ubfx(rscratch1_offset, lh, Klass::_lh_header_size_shift,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2096
           exact_log2(Klass::_lh_header_size_mask+1));   // array_offset
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2097
    __ add(src, src, rscratch1_offset);           // src array offset
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2098
    __ add(dst, dst, rscratch1_offset);           // dst array offset
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2099
    BLOCK_COMMENT("choose copy loop based on element size");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2100
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2101
    // next registers should be set before the jump to corresponding stub
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2102
    const Register from     = c_rarg0;  // source array address
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2103
    const Register to       = c_rarg1;  // destination array address
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2104
    const Register count    = c_rarg2;  // elements count
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2105
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2106
    // 'from', 'to', 'count' registers should be set in such order
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2107
    // since they are the same as 'src', 'src_pos', 'dst'.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2108
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2109
    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2110
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2111
    // The possible values of elsize are 0-3, i.e. exact_log2(element
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2112
    // size in bytes).  We do a simple bitwise binary search.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2113
  __ BIND(L_copy_bytes);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2114
    __ tbnz(r18_elsize, 1, L_copy_ints);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2115
    __ tbnz(r18_elsize, 0, L_copy_shorts);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2116
    __ lea(from, Address(src, src_pos));// src_addr
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2117
    __ lea(to,   Address(dst, dst_pos));// dst_addr
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2118
    __ movw(count, scratch_length); // length
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2119
    __ b(RuntimeAddress(byte_copy_entry));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2120
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2121
  __ BIND(L_copy_shorts);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2122
    __ lea(from, Address(src, src_pos, Address::lsl(1)));// src_addr
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2123
    __ lea(to,   Address(dst, dst_pos, Address::lsl(1)));// dst_addr
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2124
    __ movw(count, scratch_length); // length
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2125
    __ b(RuntimeAddress(short_copy_entry));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2126
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2127
  __ BIND(L_copy_ints);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2128
    __ tbnz(r18_elsize, 0, L_copy_longs);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2129
    __ lea(from, Address(src, src_pos, Address::lsl(2)));// src_addr
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2130
    __ lea(to,   Address(dst, dst_pos, Address::lsl(2)));// dst_addr
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2131
    __ movw(count, scratch_length); // length
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2132
    __ b(RuntimeAddress(int_copy_entry));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2133
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2134
  __ BIND(L_copy_longs);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2135
#ifdef ASSERT
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2136
    {
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2137
      BLOCK_COMMENT("assert long copy {");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2138
      Label L;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2139
      __ andw(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> r18_elsize
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2140
      __ cmpw(r18_elsize, LogBytesPerLong);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2141
      __ br(Assembler::EQ, L);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2142
      __ stop("must be long copy, but elsize is wrong");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2143
      __ bind(L);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2144
      BLOCK_COMMENT("} assert long copy done");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2145
    }
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2146
#endif
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2147
    __ lea(from, Address(src, src_pos, Address::lsl(3)));// src_addr
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2148
    __ lea(to,   Address(dst, dst_pos, Address::lsl(3)));// dst_addr
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2149
    __ movw(count, scratch_length); // length
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2150
    __ b(RuntimeAddress(long_copy_entry));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2151
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2152
    // ObjArrayKlass
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2153
  __ BIND(L_objArray);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2154
    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2155
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2156
    Label L_plain_copy, L_checkcast_copy;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2157
    //  test array classes for subtyping
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2158
    __ load_klass(r18, dst);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2159
    __ cmp(scratch_src_klass, r18); // usual case is exact equality
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2160
    __ br(Assembler::NE, L_checkcast_copy);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2161
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2162
    // Identically typed arrays can be copied without element-wise checks.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2163
    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2164
                           rscratch2, L_failed);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2165
39232
118d17fef4f9 8156731: aarch64: java/util/Arrays/Correct.java fails due to _generic_arraycopy stub routine
fyang
parents: 38233
diff changeset
  2166
    __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2167
    __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
39232
118d17fef4f9 8156731: aarch64: java/util/Arrays/Correct.java fails due to _generic_arraycopy stub routine
fyang
parents: 38233
diff changeset
  2168
    __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop)));
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2169
    __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2170
    __ movw(count, scratch_length); // length
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2171
  __ BIND(L_plain_copy);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2172
    __ b(RuntimeAddress(oop_copy_entry));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2173
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2174
  __ BIND(L_checkcast_copy);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2175
    // live at this point:  scratch_src_klass, scratch_length, r18 (dst_klass)
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2176
    {
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2177
      // Before looking at dst.length, make sure dst is also an objArray.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2178
      __ ldrw(rscratch1, Address(r18, lh_offset));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2179
      __ movw(rscratch2, objArray_lh);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2180
      __ eorw(rscratch1, rscratch1, rscratch2);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2181
      __ cbnzw(rscratch1, L_failed);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2182
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2183
      // It is safe to examine both src.length and dst.length.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2184
      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2185
                             r18, L_failed);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2186
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2187
      const Register rscratch2_dst_klass = rscratch2;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2188
      __ load_klass(rscratch2_dst_klass, dst); // reload
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2189
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2190
      // Marshal the base address arguments now, freeing registers.
39232
118d17fef4f9 8156731: aarch64: java/util/Arrays/Correct.java fails due to _generic_arraycopy stub routine
fyang
parents: 38233
diff changeset
  2191
      __ lea(from, Address(src, src_pos, Address::lsl(LogBytesPerHeapOop)));
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2192
      __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
39232
118d17fef4f9 8156731: aarch64: java/util/Arrays/Correct.java fails due to _generic_arraycopy stub routine
fyang
parents: 38233
diff changeset
  2193
      __ lea(to, Address(dst, dst_pos, Address::lsl(LogBytesPerHeapOop)));
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2194
      __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2195
      __ movw(count, length);           // length (reloaded)
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2196
      Register sco_temp = c_rarg3;      // this register is free now
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2197
      assert_different_registers(from, to, count, sco_temp,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2198
                                 rscratch2_dst_klass, scratch_src_klass);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2199
      // assert_clean_int(count, sco_temp);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2200
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2201
      // Generate the type check.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2202
      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2203
      __ ldrw(sco_temp, Address(rscratch2_dst_klass, sco_offset));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2204
      // assert_clean_int(sco_temp, r18);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2205
      generate_type_check(scratch_src_klass, sco_temp, rscratch2_dst_klass, L_plain_copy);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2206
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2207
      // Fetch destination element klass from the ObjArrayKlass header.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2208
      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2209
      __ ldr(rscratch2_dst_klass, Address(rscratch2_dst_klass, ek_offset));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2210
      __ ldrw(sco_temp, Address(rscratch2_dst_klass, sco_offset));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2211
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2212
      // the checkcast_copy loop needs two extra arguments:
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2213
      assert(c_rarg3 == sco_temp, "#3 already in place");
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2214
      // Set up arguments for checkcast_copy_entry.
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2215
      __ mov(c_rarg4, rscratch2_dst_klass);  // dst.klass.element_klass
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2216
      __ b(RuntimeAddress(checkcast_copy_entry));
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2217
    }
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2218
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2219
  __ BIND(L_failed);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2220
    __ mov(r0, -1);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2221
    __ leave();   // required for proper stackwalking of RuntimeStub frame
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2222
    __ ret(lr);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2223
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2224
    return start;
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2225
  }
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2226
38028
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2227
  //
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2228
  // Generate stub for array fill. If "aligned" is true, the
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2229
  // "to" address is assumed to be heapword aligned.
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2230
  //
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2231
  // Arguments for generated stub:
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2232
  //   to:    c_rarg0
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2233
  //   value: c_rarg1
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2234
  //   count: c_rarg2 treated as signed
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2235
  //
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2236
  address generate_fill(BasicType t, bool aligned, const char *name) {
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2237
    __ align(CodeEntryAlignment);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2238
    StubCodeMark mark(this, "StubRoutines", name);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2239
    address start = __ pc();
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2240
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2241
    BLOCK_COMMENT("Entry:");
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2242
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2243
    const Register to        = c_rarg0;  // source array address
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2244
    const Register value     = c_rarg1;  // value
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2245
    const Register count     = c_rarg2;  // elements count
38233
9f784c50b967 8155967: aarch64: fix register usage in block zeroing
enevill
parents: 38225
diff changeset
  2246
9f784c50b967 8155967: aarch64: fix register usage in block zeroing
enevill
parents: 38225
diff changeset
  2247
    const Register bz_base = r10;        // base for block_zero routine
9f784c50b967 8155967: aarch64: fix register usage in block zeroing
enevill
parents: 38225
diff changeset
  2248
    const Register cnt_words = r11;      // temp register
38028
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2249
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2250
    __ enter();
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2251
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2252
    Label L_fill_elements, L_exit1;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2253
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2254
    int shift = -1;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2255
    switch (t) {
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2256
      case T_BYTE:
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2257
        shift = 0;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2258
        __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2259
        __ bfi(value, value, 8, 8);   // 8 bit -> 16 bit
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2260
        __ bfi(value, value, 16, 16); // 16 bit -> 32 bit
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2261
        __ br(Assembler::LO, L_fill_elements);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2262
        break;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2263
      case T_SHORT:
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2264
        shift = 1;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2265
        __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2266
        __ bfi(value, value, 16, 16); // 16 bit -> 32 bit
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2267
        __ br(Assembler::LO, L_fill_elements);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2268
        break;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2269
      case T_INT:
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2270
        shift = 2;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2271
        __ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2272
        __ br(Assembler::LO, L_fill_elements);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2273
        break;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2274
      default: ShouldNotReachHere();
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2275
    }
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2276
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2277
    // Align source address at 8 bytes address boundary.
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2278
    Label L_skip_align1, L_skip_align2, L_skip_align4;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2279
    if (!aligned) {
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2280
      switch (t) {
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2281
        case T_BYTE:
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2282
          // One byte misalignment happens only for byte arrays.
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2283
          __ tbz(to, 0, L_skip_align1);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2284
          __ strb(value, Address(__ post(to, 1)));
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2285
          __ subw(count, count, 1);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2286
          __ bind(L_skip_align1);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2287
          // Fallthrough
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2288
        case T_SHORT:
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2289
          // Two bytes misalignment happens only for byte and short (char) arrays.
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2290
          __ tbz(to, 1, L_skip_align2);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2291
          __ strh(value, Address(__ post(to, 2)));
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2292
          __ subw(count, count, 2 >> shift);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2293
          __ bind(L_skip_align2);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2294
          // Fallthrough
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2295
        case T_INT:
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2296
          // Align to 8 bytes, we know we are 4 byte aligned to start.
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2297
          __ tbz(to, 2, L_skip_align4);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2298
          __ strw(value, Address(__ post(to, 4)));
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2299
          __ subw(count, count, 4 >> shift);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2300
          __ bind(L_skip_align4);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2301
          break;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2302
        default: ShouldNotReachHere();
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2303
      }
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2304
    }
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2305
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2306
    //
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2307
    //  Fill large chunks
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2308
    //
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2309
    __ lsrw(cnt_words, count, 3 - shift); // number of words
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2310
    __ bfi(value, value, 32, 32);         // 32 bit -> 64 bit
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2311
    __ subw(count, count, cnt_words, Assembler::LSL, 3 - shift);
38143
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
  2312
    if (UseBlockZeroing) {
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
  2313
      Label non_block_zeroing, rest;
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
  2314
      // If the fill value is zero we can use the fast zero_words().
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
  2315
      __ cbnz(value, non_block_zeroing);
38233
9f784c50b967 8155967: aarch64: fix register usage in block zeroing
enevill
parents: 38225
diff changeset
  2316
      __ mov(bz_base, to);
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
  2317
      __ add(to, to, cnt_words, Assembler::LSL, LogBytesPerWord);
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
  2318
      __ zero_words(bz_base, cnt_words);
38143
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
  2319
      __ b(rest);
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
  2320
      __ bind(non_block_zeroing);
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
  2321
      __ fill_words(to, cnt_words, value);
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
  2322
      __ bind(rest);
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
  2323
    } else {
38143
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
  2324
      __ fill_words(to, cnt_words, value);
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
  2325
    }
38028
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2326
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2327
    // Remaining count is less than 8 bytes. Fill it by a single store.
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2328
    // Note that the total length is no less than 8 bytes.
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2329
    if (t == T_BYTE || t == T_SHORT) {
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2330
      Label L_exit1;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2331
      __ cbzw(count, L_exit1);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2332
      __ add(to, to, count, Assembler::LSL, shift); // points to the end
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2333
      __ str(value, Address(to, -8));    // overwrite some elements
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2334
      __ bind(L_exit1);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2335
      __ leave();
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2336
      __ ret(lr);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2337
    }
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2338
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2339
    // Handle copies less than 8 bytes.
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2340
    Label L_fill_2, L_fill_4, L_exit2;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2341
    __ bind(L_fill_elements);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2342
    switch (t) {
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2343
      case T_BYTE:
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2344
        __ tbz(count, 0, L_fill_2);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2345
        __ strb(value, Address(__ post(to, 1)));
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2346
        __ bind(L_fill_2);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2347
        __ tbz(count, 1, L_fill_4);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2348
        __ strh(value, Address(__ post(to, 2)));
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2349
        __ bind(L_fill_4);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2350
        __ tbz(count, 2, L_exit2);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2351
        __ strw(value, Address(to));
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2352
        break;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2353
      case T_SHORT:
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2354
        __ tbz(count, 0, L_fill_4);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2355
        __ strh(value, Address(__ post(to, 2)));
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2356
        __ bind(L_fill_4);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2357
        __ tbz(count, 1, L_exit2);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2358
        __ strw(value, Address(to));
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2359
        break;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2360
      case T_INT:
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2361
        __ cbzw(count, L_exit2);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2362
        __ strw(value, Address(to));
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2363
        break;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2364
      default: ShouldNotReachHere();
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2365
    }
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2366
    __ bind(L_exit2);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2367
    __ leave();
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2368
    __ ret(lr);
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2369
    return start;
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2370
  }
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2371
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2372
  void generate_arraycopy_stubs() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2373
    address entry;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2374
    address entry_jbyte_arraycopy;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2375
    address entry_jshort_arraycopy;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2376
    address entry_jint_arraycopy;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2377
    address entry_oop_arraycopy;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2378
    address entry_jlong_arraycopy;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2379
    address entry_checkcast_arraycopy;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2380
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2381
    generate_copy_longs(copy_f, r0, r1, rscratch2, copy_forwards);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2382
    generate_copy_longs(copy_b, r0, r1, rscratch2, copy_backwards);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2383
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
  2384
    StubRoutines::aarch64::_zero_blocks = generate_zero_blocks();
38143
3b732f17ea7d 8155617: aarch64: ClearArray does not use DC ZVA
enevill
parents: 38051
diff changeset
  2385
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2386
    //*** jbyte
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2387
    // Always need aligned and unaligned versions
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2388
    StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2389
                                                                                  "jbyte_disjoint_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2390
    StubRoutines::_jbyte_arraycopy                  = generate_conjoint_byte_copy(false, entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2391
                                                                                  &entry_jbyte_arraycopy,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2392
                                                                                  "jbyte_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2393
    StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2394
                                                                                  "arrayof_jbyte_disjoint_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2395
    StubRoutines::_arrayof_jbyte_arraycopy          = generate_conjoint_byte_copy(true, entry, NULL,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2396
                                                                                  "arrayof_jbyte_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2397
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2398
    //*** jshort
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2399
    // Always need aligned and unaligned versions
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2400
    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, &entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2401
                                                                                    "jshort_disjoint_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2402
    StubRoutines::_jshort_arraycopy                  = generate_conjoint_short_copy(false, entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2403
                                                                                    &entry_jshort_arraycopy,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2404
                                                                                    "jshort_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2405
    StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2406
                                                                                    "arrayof_jshort_disjoint_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2407
    StubRoutines::_arrayof_jshort_arraycopy          = generate_conjoint_short_copy(true, entry, NULL,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2408
                                                                                    "arrayof_jshort_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2409
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2410
    //*** jint
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2411
    // Aligned versions
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2412
    StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2413
                                                                                "arrayof_jint_disjoint_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2414
    StubRoutines::_arrayof_jint_arraycopy          = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2415
                                                                                "arrayof_jint_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2416
    // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2417
    // entry_jint_arraycopy always points to the unaligned version
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2418
    StubRoutines::_jint_disjoint_arraycopy         = generate_disjoint_int_copy(false, &entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2419
                                                                                "jint_disjoint_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2420
    StubRoutines::_jint_arraycopy                  = generate_conjoint_int_copy(false, entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2421
                                                                                &entry_jint_arraycopy,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2422
                                                                                "jint_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2423
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2424
    //*** jlong
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2425
    // It is always aligned
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2426
    StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2427
                                                                                  "arrayof_jlong_disjoint_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2428
    StubRoutines::_arrayof_jlong_arraycopy          = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2429
                                                                                  "arrayof_jlong_arraycopy");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2430
    StubRoutines::_jlong_disjoint_arraycopy         = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2431
    StubRoutines::_jlong_arraycopy                  = StubRoutines::_arrayof_jlong_arraycopy;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2432
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2433
    //*** oops
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2434
    {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2435
      // With compressed oops we need unaligned versions; notice that
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2436
      // we overwrite entry_oop_arraycopy.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2437
      bool aligned = !UseCompressedOops;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2438
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2439
      StubRoutines::_arrayof_oop_disjoint_arraycopy
36326
d25af58cfc94 8150045: arraycopy causes segfaults in SATB during garbage collection
aph
parents: 35843
diff changeset
  2440
        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy",
d25af58cfc94 8150045: arraycopy causes segfaults in SATB during garbage collection
aph
parents: 35843
diff changeset
  2441
                                     /*dest_uninitialized*/false);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2442
      StubRoutines::_arrayof_oop_arraycopy
36326
d25af58cfc94 8150045: arraycopy causes segfaults in SATB during garbage collection
aph
parents: 35843
diff changeset
  2443
        = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy",
d25af58cfc94 8150045: arraycopy causes segfaults in SATB during garbage collection
aph
parents: 35843
diff changeset
  2444
                                     /*dest_uninitialized*/false);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2445
      // Aligned versions without pre-barriers
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2446
      StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2447
        = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2448
                                     /*dest_uninitialized*/true);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2449
      StubRoutines::_arrayof_oop_arraycopy_uninit
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2450
        = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2451
                                     /*dest_uninitialized*/true);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2452
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2453
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2454
    StubRoutines::_oop_disjoint_arraycopy            = StubRoutines::_arrayof_oop_disjoint_arraycopy;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2455
    StubRoutines::_oop_arraycopy                     = StubRoutines::_arrayof_oop_arraycopy;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2456
    StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2457
    StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2458
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2459
    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2460
    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2461
                                                                        /*dest_uninitialized*/true);
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2462
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2463
    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
37271
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  2464
                                                              entry_jbyte_arraycopy,
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  2465
                                                              entry_jshort_arraycopy,
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  2466
                                                              entry_jint_arraycopy,
95774d8b3cc2 8152840: aarch64: improve _unsafe_arraycopy stub routine
fyang
parents: 36595
diff changeset
  2467
                                                              entry_jlong_arraycopy);
35119
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2468
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2469
    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2470
                                                               entry_jbyte_arraycopy,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2471
                                                               entry_jshort_arraycopy,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2472
                                                               entry_jint_arraycopy,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2473
                                                               entry_oop_arraycopy,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2474
                                                               entry_jlong_arraycopy,
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2475
                                                               entry_checkcast_arraycopy);
7af8d9f08a25 8145320: Create unsafe_arraycopy and generic_arraycopy for AArch64
aph
parents: 33198
diff changeset
  2476
38028
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2477
    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2478
    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2479
    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2480
    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2481
    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
be8cc044b136 8153797: aarch64: Add Arrays.fill stub code
enevill
parents: 37271
diff changeset
  2482
    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2483
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2484
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2485
  void generate_math_stubs() { Unimplemented(); }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2486
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2487
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2488
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2489
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2490
  //   c_rarg0   - source byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2491
  //   c_rarg1   - destination byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2492
  //   c_rarg2   - K (key) in little endian int array
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2493
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2494
  address generate_aescrypt_encryptBlock() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2495
    __ align(CodeEntryAlignment);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2496
    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2497
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2498
    Label L_doLast;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2499
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2500
    const Register from        = c_rarg0;  // source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2501
    const Register to          = c_rarg1;  // destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2502
    const Register key         = c_rarg2;  // key array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2503
    const Register keylen      = rscratch1;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2504
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2505
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2506
    __ enter();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2507
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2508
    __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2509
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2510
    __ ld1(v0, __ T16B, from); // get 16 bytes of input
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2511
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2512
    __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2513
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2514
    __ rev32(v2, __ T16B, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2515
    __ rev32(v3, __ T16B, v3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2516
    __ rev32(v4, __ T16B, v4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2517
    __ aese(v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2518
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2519
    __ aese(v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2520
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2521
    __ aese(v0, v3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2522
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2523
    __ aese(v0, v4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2524
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2525
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2526
    __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2527
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2528
    __ rev32(v2, __ T16B, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2529
    __ rev32(v3, __ T16B, v3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2530
    __ rev32(v4, __ T16B, v4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2531
    __ aese(v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2532
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2533
    __ aese(v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2534
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2535
    __ aese(v0, v3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2536
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2537
    __ aese(v0, v4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2538
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2539
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2540
    __ ld1(v1, v2, __ T16B, __ post(key, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2541
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2542
    __ rev32(v2, __ T16B, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2543
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2544
    __ cmpw(keylen, 44);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2545
    __ br(Assembler::EQ, L_doLast);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2546
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2547
    __ aese(v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2548
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2549
    __ aese(v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2550
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2551
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2552
    __ ld1(v1, v2, __ T16B, __ post(key, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2553
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2554
    __ rev32(v2, __ T16B, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2555
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2556
    __ cmpw(keylen, 52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2557
    __ br(Assembler::EQ, L_doLast);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2558
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2559
    __ aese(v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2560
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2561
    __ aese(v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2562
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2563
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2564
    __ ld1(v1, v2, __ T16B, __ post(key, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2565
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2566
    __ rev32(v2, __ T16B, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2567
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2568
    __ BIND(L_doLast);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2569
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2570
    __ aese(v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2571
    __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2572
    __ aese(v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2573
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2574
    __ ld1(v1, __ T16B, key);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2575
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2576
    __ eor(v0, __ T16B, v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2577
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2578
    __ st1(v0, __ T16B, to);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2579
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2580
    __ mov(r0, 0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2581
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2582
    __ leave();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2583
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2584
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2585
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2586
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2587
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2588
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2589
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2590
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2591
  //   c_rarg0   - source byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2592
  //   c_rarg1   - destination byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2593
  //   c_rarg2   - K (key) in little endian int array
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2594
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2595
  address generate_aescrypt_decryptBlock() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2596
    assert(UseAES, "need AES instructions and misaligned SSE support");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2597
    __ align(CodeEntryAlignment);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2598
    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2599
    Label L_doLast;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2600
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2601
    const Register from        = c_rarg0;  // source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2602
    const Register to          = c_rarg1;  // destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2603
    const Register key         = c_rarg2;  // key array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2604
    const Register keylen      = rscratch1;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2605
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2606
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2607
    __ enter(); // required for proper stackwalking of RuntimeStub frame
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2608
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2609
    __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2610
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2611
    __ ld1(v0, __ T16B, from); // get 16 bytes of input
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2612
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2613
    __ ld1(v5, __ T16B, __ post(key, 16));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2614
    __ rev32(v5, __ T16B, v5);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2615
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2616
    __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2617
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2618
    __ rev32(v2, __ T16B, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2619
    __ rev32(v3, __ T16B, v3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2620
    __ rev32(v4, __ T16B, v4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2621
    __ aesd(v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2622
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2623
    __ aesd(v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2624
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2625
    __ aesd(v0, v3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2626
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2627
    __ aesd(v0, v4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2628
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2629
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2630
    __ ld1(v1, v2, v3, v4, __ T16B, __ post(key, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2631
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2632
    __ rev32(v2, __ T16B, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2633
    __ rev32(v3, __ T16B, v3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2634
    __ rev32(v4, __ T16B, v4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2635
    __ aesd(v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2636
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2637
    __ aesd(v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2638
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2639
    __ aesd(v0, v3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2640
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2641
    __ aesd(v0, v4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2642
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2643
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2644
    __ ld1(v1, v2, __ T16B, __ post(key, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2645
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2646
    __ rev32(v2, __ T16B, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2647
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2648
    __ cmpw(keylen, 44);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2649
    __ br(Assembler::EQ, L_doLast);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2650
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2651
    __ aesd(v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2652
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2653
    __ aesd(v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2654
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2655
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2656
    __ ld1(v1, v2, __ T16B, __ post(key, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2657
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2658
    __ rev32(v2, __ T16B, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2659
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2660
    __ cmpw(keylen, 52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2661
    __ br(Assembler::EQ, L_doLast);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2662
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2663
    __ aesd(v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2664
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2665
    __ aesd(v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2666
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2667
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2668
    __ ld1(v1, v2, __ T16B, __ post(key, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2669
    __ rev32(v1, __ T16B, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2670
    __ rev32(v2, __ T16B, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2671
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2672
    __ BIND(L_doLast);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2673
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2674
    __ aesd(v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2675
    __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2676
    __ aesd(v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2677
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2678
    __ eor(v0, __ T16B, v0, v5);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2679
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2680
    __ st1(v0, __ T16B, to);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2681
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2682
    __ mov(r0, 0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2683
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2684
    __ leave();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2685
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2686
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2687
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2688
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2689
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2690
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2691
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2692
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2693
  //   c_rarg0   - source byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2694
  //   c_rarg1   - destination byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2695
  //   c_rarg2   - K (key) in little endian int array
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2696
  //   c_rarg3   - r vector byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2697
  //   c_rarg4   - input length
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2698
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2699
  // Output:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2700
  //   x0        - input length
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2701
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2702
  address generate_cipherBlockChaining_encryptAESCrypt() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2703
    assert(UseAES, "need AES instructions and misaligned SSE support");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2704
    __ align(CodeEntryAlignment);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2705
    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2706
42577
c47121f6307d 8169529: AArch64: Revert old JDK-8167595 changes after JDK-8159035 fix is pushed
rraghavan
parents: 41729
diff changeset
  2707
    Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2708
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2709
    const Register from        = c_rarg0;  // source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2710
    const Register to          = c_rarg1;  // destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2711
    const Register key         = c_rarg2;  // key array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2712
    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2713
                                           // and left with the results of the last encryption block
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2714
    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2715
    const Register keylen      = rscratch1;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2716
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2717
    address start = __ pc();
41729
d852f04fa9df 8167595: AArch64: SEGV in stub code cipherBlockChaining_decryptAESCrypt
aph
parents: 40643
diff changeset
  2718
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2719
      __ enter();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2720
42577
c47121f6307d 8169529: AArch64: Revert old JDK-8167595 changes after JDK-8159035 fix is pushed
rraghavan
parents: 41729
diff changeset
  2721
      __ movw(rscratch2, len_reg);
41729
d852f04fa9df 8167595: AArch64: SEGV in stub code cipherBlockChaining_decryptAESCrypt
aph
parents: 40643
diff changeset
  2722
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2723
      __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2724
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2725
      __ ld1(v0, __ T16B, rvec);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2726
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2727
      __ cmpw(keylen, 52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2728
      __ br(Assembler::CC, L_loadkeys_44);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2729
      __ br(Assembler::EQ, L_loadkeys_52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2730
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2731
      __ ld1(v17, v18, __ T16B, __ post(key, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2732
      __ rev32(v17, __ T16B, v17);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2733
      __ rev32(v18, __ T16B, v18);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2734
    __ BIND(L_loadkeys_52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2735
      __ ld1(v19, v20, __ T16B, __ post(key, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2736
      __ rev32(v19, __ T16B, v19);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2737
      __ rev32(v20, __ T16B, v20);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2738
    __ BIND(L_loadkeys_44);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2739
      __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2740
      __ rev32(v21, __ T16B, v21);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2741
      __ rev32(v22, __ T16B, v22);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2742
      __ rev32(v23, __ T16B, v23);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2743
      __ rev32(v24, __ T16B, v24);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2744
      __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2745
      __ rev32(v25, __ T16B, v25);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2746
      __ rev32(v26, __ T16B, v26);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2747
      __ rev32(v27, __ T16B, v27);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2748
      __ rev32(v28, __ T16B, v28);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2749
      __ ld1(v29, v30, v31, __ T16B, key);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2750
      __ rev32(v29, __ T16B, v29);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2751
      __ rev32(v30, __ T16B, v30);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2752
      __ rev32(v31, __ T16B, v31);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2753
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2754
    __ BIND(L_aes_loop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2755
      __ ld1(v1, __ T16B, __ post(from, 16));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2756
      __ eor(v0, __ T16B, v0, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2757
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2758
      __ br(Assembler::CC, L_rounds_44);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2759
      __ br(Assembler::EQ, L_rounds_52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2760
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2761
      __ aese(v0, v17); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2762
      __ aese(v0, v18); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2763
    __ BIND(L_rounds_52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2764
      __ aese(v0, v19); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2765
      __ aese(v0, v20); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2766
    __ BIND(L_rounds_44);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2767
      __ aese(v0, v21); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2768
      __ aese(v0, v22); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2769
      __ aese(v0, v23); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2770
      __ aese(v0, v24); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2771
      __ aese(v0, v25); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2772
      __ aese(v0, v26); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2773
      __ aese(v0, v27); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2774
      __ aese(v0, v28); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2775
      __ aese(v0, v29); __ aesmc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2776
      __ aese(v0, v30);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2777
      __ eor(v0, __ T16B, v0, v31);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2778
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2779
      __ st1(v0, __ T16B, __ post(to, 16));
41729
d852f04fa9df 8167595: AArch64: SEGV in stub code cipherBlockChaining_decryptAESCrypt
aph
parents: 40643
diff changeset
  2780
d852f04fa9df 8167595: AArch64: SEGV in stub code cipherBlockChaining_decryptAESCrypt
aph
parents: 40643
diff changeset
  2781
      __ subw(len_reg, len_reg, 16);
d852f04fa9df 8167595: AArch64: SEGV in stub code cipherBlockChaining_decryptAESCrypt
aph
parents: 40643
diff changeset
  2782
      __ cbnzw(len_reg, L_aes_loop);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2783
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2784
      __ st1(v0, __ T16B, rvec);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2785
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2786
      __ mov(r0, rscratch2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2787
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2788
      __ leave();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2789
      __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2790
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2791
      return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2792
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2793
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2794
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2795
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2796
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2797
  //   c_rarg0   - source byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2798
  //   c_rarg1   - destination byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2799
  //   c_rarg2   - K (key) in little endian int array
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2800
  //   c_rarg3   - r vector byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2801
  //   c_rarg4   - input length
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2802
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2803
  // Output:
35135
twisti
parents: 35119 34664
diff changeset
  2804
  //   r0        - input length
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2805
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2806
  address generate_cipherBlockChaining_decryptAESCrypt() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2807
    assert(UseAES, "need AES instructions and misaligned SSE support");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2808
    __ align(CodeEntryAlignment);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2809
    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2810
42577
c47121f6307d 8169529: AArch64: Revert old JDK-8167595 changes after JDK-8159035 fix is pushed
rraghavan
parents: 41729
diff changeset
  2811
    Label L_loadkeys_44, L_loadkeys_52, L_aes_loop, L_rounds_44, L_rounds_52;
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2812
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2813
    const Register from        = c_rarg0;  // source array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2814
    const Register to          = c_rarg1;  // destination array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2815
    const Register key         = c_rarg2;  // key array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2816
    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2817
                                           // and left with the results of the last encryption block
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2818
    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2819
    const Register keylen      = rscratch1;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2820
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2821
    address start = __ pc();
41729
d852f04fa9df 8167595: AArch64: SEGV in stub code cipherBlockChaining_decryptAESCrypt
aph
parents: 40643
diff changeset
  2822
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2823
      __ enter();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2824
42577
c47121f6307d 8169529: AArch64: Revert old JDK-8167595 changes after JDK-8159035 fix is pushed
rraghavan
parents: 41729
diff changeset
  2825
      __ movw(rscratch2, len_reg);
41729
d852f04fa9df 8167595: AArch64: SEGV in stub code cipherBlockChaining_decryptAESCrypt
aph
parents: 40643
diff changeset
  2826
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2827
      __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2828
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2829
      __ ld1(v2, __ T16B, rvec);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2830
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2831
      __ ld1(v31, __ T16B, __ post(key, 16));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2832
      __ rev32(v31, __ T16B, v31);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2833
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2834
      __ cmpw(keylen, 52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2835
      __ br(Assembler::CC, L_loadkeys_44);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2836
      __ br(Assembler::EQ, L_loadkeys_52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2837
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2838
      __ ld1(v17, v18, __ T16B, __ post(key, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2839
      __ rev32(v17, __ T16B, v17);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2840
      __ rev32(v18, __ T16B, v18);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2841
    __ BIND(L_loadkeys_52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2842
      __ ld1(v19, v20, __ T16B, __ post(key, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2843
      __ rev32(v19, __ T16B, v19);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2844
      __ rev32(v20, __ T16B, v20);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2845
    __ BIND(L_loadkeys_44);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2846
      __ ld1(v21, v22, v23, v24, __ T16B, __ post(key, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2847
      __ rev32(v21, __ T16B, v21);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2848
      __ rev32(v22, __ T16B, v22);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2849
      __ rev32(v23, __ T16B, v23);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2850
      __ rev32(v24, __ T16B, v24);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2851
      __ ld1(v25, v26, v27, v28, __ T16B, __ post(key, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2852
      __ rev32(v25, __ T16B, v25);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2853
      __ rev32(v26, __ T16B, v26);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2854
      __ rev32(v27, __ T16B, v27);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2855
      __ rev32(v28, __ T16B, v28);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2856
      __ ld1(v29, v30, __ T16B, key);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2857
      __ rev32(v29, __ T16B, v29);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2858
      __ rev32(v30, __ T16B, v30);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2859
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2860
    __ BIND(L_aes_loop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2861
      __ ld1(v0, __ T16B, __ post(from, 16));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2862
      __ orr(v1, __ T16B, v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2863
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2864
      __ br(Assembler::CC, L_rounds_44);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2865
      __ br(Assembler::EQ, L_rounds_52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2866
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2867
      __ aesd(v0, v17); __ aesimc(v0, v0);
34664
41c821224dd7 8144201: aarch64: jdk/test/com/sun/net/httpserver/Test6a.java fails with --enable-unlimited-crypto
fyang
parents: 33198
diff changeset
  2868
      __ aesd(v0, v18); __ aesimc(v0, v0);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2869
    __ BIND(L_rounds_52);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2870
      __ aesd(v0, v19); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2871
      __ aesd(v0, v20); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2872
    __ BIND(L_rounds_44);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2873
      __ aesd(v0, v21); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2874
      __ aesd(v0, v22); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2875
      __ aesd(v0, v23); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2876
      __ aesd(v0, v24); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2877
      __ aesd(v0, v25); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2878
      __ aesd(v0, v26); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2879
      __ aesd(v0, v27); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2880
      __ aesd(v0, v28); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2881
      __ aesd(v0, v29); __ aesimc(v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2882
      __ aesd(v0, v30);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2883
      __ eor(v0, __ T16B, v0, v31);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2884
      __ eor(v0, __ T16B, v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2885
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2886
      __ st1(v0, __ T16B, __ post(to, 16));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2887
      __ orr(v2, __ T16B, v1, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2888
41729
d852f04fa9df 8167595: AArch64: SEGV in stub code cipherBlockChaining_decryptAESCrypt
aph
parents: 40643
diff changeset
  2889
      __ subw(len_reg, len_reg, 16);
d852f04fa9df 8167595: AArch64: SEGV in stub code cipherBlockChaining_decryptAESCrypt
aph
parents: 40643
diff changeset
  2890
      __ cbnzw(len_reg, L_aes_loop);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2891
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2892
      __ st1(v2, __ T16B, rvec);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2893
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2894
      __ mov(r0, rscratch2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2895
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2896
      __ leave();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2897
      __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2898
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2899
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2900
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2901
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2902
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2903
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2904
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2905
  //   c_rarg0   - byte[]  source+offset
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2906
  //   c_rarg1   - int[]   SHA.state
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2907
  //   c_rarg2   - int     offset
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2908
  //   c_rarg3   - int     limit
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2909
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2910
  address generate_sha1_implCompress(bool multi_block, const char *name) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2911
    __ align(CodeEntryAlignment);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2912
    StubCodeMark mark(this, "StubRoutines", name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2913
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2914
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2915
    Register buf   = c_rarg0;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2916
    Register state = c_rarg1;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2917
    Register ofs   = c_rarg2;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2918
    Register limit = c_rarg3;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2919
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2920
    Label keys;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2921
    Label sha1_loop;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2922
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2923
    // load the keys into v0..v3
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2924
    __ adr(rscratch1, keys);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2925
    __ ld4r(v0, v1, v2, v3, __ T4S, Address(rscratch1));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2926
    // load 5 words state into v6, v7
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2927
    __ ldrq(v6, Address(state, 0));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2928
    __ ldrs(v7, Address(state, 16));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2929
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2930
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2931
    __ BIND(sha1_loop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2932
    // load 64 bytes of data into v16..v19
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2933
    __ ld1(v16, v17, v18, v19, __ T4S, multi_block ? __ post(buf, 64) : buf);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2934
    __ rev32(v16, __ T16B, v16);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2935
    __ rev32(v17, __ T16B, v17);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2936
    __ rev32(v18, __ T16B, v18);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2937
    __ rev32(v19, __ T16B, v19);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2938
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2939
    // do the sha1
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2940
    __ addv(v4, __ T4S, v16, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2941
    __ orr(v20, __ T16B, v6, v6);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2942
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2943
    FloatRegister d0 = v16;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2944
    FloatRegister d1 = v17;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2945
    FloatRegister d2 = v18;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2946
    FloatRegister d3 = v19;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2947
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2948
    for (int round = 0; round < 20; round++) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2949
      FloatRegister tmp1 = (round & 1) ? v4 : v5;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2950
      FloatRegister tmp2 = (round & 1) ? v21 : v22;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2951
      FloatRegister tmp3 = round ? ((round & 1) ? v22 : v21) : v7;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2952
      FloatRegister tmp4 = (round & 1) ? v5 : v4;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2953
      FloatRegister key = (round < 4) ? v0 : ((round < 9) ? v1 : ((round < 14) ? v2 : v3));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2954
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2955
      if (round < 16) __ sha1su0(d0, __ T4S, d1, d2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2956
      if (round < 19) __ addv(tmp1, __ T4S, d1, key);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2957
      __ sha1h(tmp2, __ T4S, v20);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2958
      if (round < 5)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2959
        __ sha1c(v20, __ T4S, tmp3, tmp4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2960
      else if (round < 10 || round >= 15)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2961
        __ sha1p(v20, __ T4S, tmp3, tmp4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2962
      else
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2963
        __ sha1m(v20, __ T4S, tmp3, tmp4);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2964
      if (round < 16) __ sha1su1(d0, __ T4S, d3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2965
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2966
      tmp1 = d0; d0 = d1; d1 = d2; d2 = d3; d3 = tmp1;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2967
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2968
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2969
    __ addv(v7, __ T2S, v7, v21);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2970
    __ addv(v6, __ T4S, v6, v20);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2971
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2972
    if (multi_block) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2973
      __ add(ofs, ofs, 64);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2974
      __ cmp(ofs, limit);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2975
      __ br(Assembler::LE, sha1_loop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2976
      __ mov(c_rarg0, ofs); // return ofs
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2977
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2978
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2979
    __ strq(v6, Address(state, 0));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2980
    __ strs(v7, Address(state, 16));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2981
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2982
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2983
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2984
    __ bind(keys);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2985
    __ emit_int32(0x5a827999);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2986
    __ emit_int32(0x6ed9eba1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2987
    __ emit_int32(0x8f1bbcdc);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2988
    __ emit_int32(0xca62c1d6);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2989
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2990
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2991
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2992
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2993
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2994
  // Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2995
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2996
  // Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2997
  //   c_rarg0   - byte[]  source+offset
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2998
  //   c_rarg1   - int[]   SHA.state
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  2999
  //   c_rarg2   - int     offset
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3000
  //   c_rarg3   - int     limit
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3001
  //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3002
  address generate_sha256_implCompress(bool multi_block, const char *name) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3003
    static const uint32_t round_consts[64] = {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3004
      0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3005
      0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3006
      0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3007
      0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3008
      0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3009
      0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3010
      0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3011
      0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3012
      0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3013
      0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3014
      0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3015
      0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3016
      0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3017
      0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3018
      0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3019
      0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3020
    };
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3021
    __ align(CodeEntryAlignment);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3022
    StubCodeMark mark(this, "StubRoutines", name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3023
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3024
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3025
    Register buf   = c_rarg0;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3026
    Register state = c_rarg1;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3027
    Register ofs   = c_rarg2;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3028
    Register limit = c_rarg3;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3029
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3030
    Label sha1_loop;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3031
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3032
    __ stpd(v8, v9, __ pre(sp, -32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3033
    __ stpd(v10, v11, Address(sp, 16));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3034
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3035
// dga == v0
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3036
// dgb == v1
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3037
// dg0 == v2
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3038
// dg1 == v3
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3039
// dg2 == v4
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3040
// t0 == v6
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3041
// t1 == v7
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3042
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3043
    // load 16 keys to v16..v31
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3044
    __ lea(rscratch1, ExternalAddress((address)round_consts));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3045
    __ ld1(v16, v17, v18, v19, __ T4S, __ post(rscratch1, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3046
    __ ld1(v20, v21, v22, v23, __ T4S, __ post(rscratch1, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3047
    __ ld1(v24, v25, v26, v27, __ T4S, __ post(rscratch1, 64));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3048
    __ ld1(v28, v29, v30, v31, __ T4S, rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3049
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3050
    // load 8 words (256 bits) state
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3051
    __ ldpq(v0, v1, state);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3052
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3053
    __ BIND(sha1_loop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3054
    // load 64 bytes of data into v8..v11
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3055
    __ ld1(v8, v9, v10, v11, __ T4S, multi_block ? __ post(buf, 64) : buf);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3056
    __ rev32(v8, __ T16B, v8);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3057
    __ rev32(v9, __ T16B, v9);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3058
    __ rev32(v10, __ T16B, v10);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3059
    __ rev32(v11, __ T16B, v11);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3060
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3061
    __ addv(v6, __ T4S, v8, v16);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3062
    __ orr(v2, __ T16B, v0, v0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3063
    __ orr(v3, __ T16B, v1, v1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3064
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3065
    FloatRegister d0 = v8;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3066
    FloatRegister d1 = v9;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3067
    FloatRegister d2 = v10;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3068
    FloatRegister d3 = v11;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3069
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3070
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3071
    for (int round = 0; round < 16; round++) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3072
      FloatRegister tmp1 = (round & 1) ? v6 : v7;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3073
      FloatRegister tmp2 = (round & 1) ? v7 : v6;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3074
      FloatRegister tmp3 = (round & 1) ? v2 : v4;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3075
      FloatRegister tmp4 = (round & 1) ? v4 : v2;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3076
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3077
      if (round < 12) __ sha256su0(d0, __ T4S, d1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3078
       __ orr(v4, __ T16B, v2, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3079
      if (round < 15)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3080
        __ addv(tmp1, __ T4S, d1, as_FloatRegister(round + 17));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3081
      __ sha256h(v2, __ T4S, v3, tmp2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3082
      __ sha256h2(v3, __ T4S, v4, tmp2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3083
      if (round < 12) __ sha256su1(d0, __ T4S, d2, d3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3084
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3085
      tmp1 = d0; d0 = d1; d1 = d2; d2 = d3; d3 = tmp1;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3086
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3087
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3088
    __ addv(v0, __ T4S, v0, v2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3089
    __ addv(v1, __ T4S, v1, v3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3090
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3091
    if (multi_block) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3092
      __ add(ofs, ofs, 64);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3093
      __ cmp(ofs, limit);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3094
      __ br(Assembler::LE, sha1_loop);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3095
      __ mov(c_rarg0, ofs); // return ofs
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3096
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3097
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3098
    __ ldpd(v10, v11, Address(sp, 16));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3099
    __ ldpd(v8, v9, __ post(sp, 32));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3100
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3101
    __ stpq(v0, v1, state);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3102
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3103
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3104
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3105
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3106
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3107
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3108
#ifndef BUILTIN_SIM
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3109
  // Safefetch stubs.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3110
  void generate_safefetch(const char* name, int size, address* entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3111
                          address* fault_pc, address* continuation_pc) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3112
    // safefetch signatures:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3113
    //   int      SafeFetch32(int*      adr, int      errValue);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3114
    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3115
    //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3116
    // arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3117
    //   c_rarg0 = adr
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3118
    //   c_rarg1 = errValue
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3119
    //
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3120
    // result:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3121
    //   PPC_RET  = *adr or errValue
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3122
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3123
    StubCodeMark mark(this, "StubRoutines", name);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3124
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3125
    // Entry point, pc or function descriptor.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3126
    *entry = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3127
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3128
    // Load *adr into c_rarg1, may fault.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3129
    *fault_pc = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3130
    switch (size) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3131
      case 4:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3132
        // int32_t
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3133
        __ ldrw(c_rarg1, Address(c_rarg0, 0));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3134
        break;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3135
      case 8:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3136
        // int64_t
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3137
        __ ldr(c_rarg1, Address(c_rarg0, 0));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3138
        break;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3139
      default:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3140
        ShouldNotReachHere();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3141
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3142
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3143
    // return errValue or *adr
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3144
    *continuation_pc = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3145
    __ mov(r0, c_rarg1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3146
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3147
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3148
#endif
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3149
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3150
  /**
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3151
   *  Arguments:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3152
   *
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3153
   * Inputs:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3154
   *   c_rarg0   - int crc
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3155
   *   c_rarg1   - byte* buf
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3156
   *   c_rarg2   - int length
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3157
   *
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3158
   * Ouput:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3159
   *       rax   - int crc result
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3160
   */
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3161
  address generate_updateBytesCRC32() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3162
    assert(UseCRC32Intrinsics, "what are we doing here?");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3163
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3164
    __ align(CodeEntryAlignment);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3165
    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3166
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3167
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3168
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3169
    const Register crc   = c_rarg0;  // crc
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3170
    const Register buf   = c_rarg1;  // source java byte array address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3171
    const Register len   = c_rarg2;  // length
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3172
    const Register table0 = c_rarg3; // crc_table address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3173
    const Register table1 = c_rarg4;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3174
    const Register table2 = c_rarg5;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3175
    const Register table3 = c_rarg6;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3176
    const Register tmp3 = c_rarg7;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3177
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3178
    BLOCK_COMMENT("Entry:");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3179
    __ enter(); // required for proper stackwalking of RuntimeStub frame
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3180
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3181
    __ kernel_crc32(crc, buf, len,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3182
              table0, table1, table2, table3, rscratch1, rscratch2, tmp3);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3184
    __ leave(); // required for proper stackwalking of RuntimeStub frame
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3185
    __ ret(lr);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3186
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3187
    return start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3188
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3189
30225
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3190
  /**
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3191
   *  Arguments:
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3192
   *
31591
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3193
   * Inputs:
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3194
   *   c_rarg0   - int crc
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3195
   *   c_rarg1   - byte* buf
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3196
   *   c_rarg2   - int length
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3197
   *   c_rarg3   - int* table
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3198
   *
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3199
   * Ouput:
32574
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3200
   *       r0   - int crc result
31591
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3201
   */
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3202
  address generate_updateBytesCRC32C() {
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3203
    assert(UseCRC32CIntrinsics, "what are we doing here?");
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3204
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3205
    __ align(CodeEntryAlignment);
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3206
    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3207
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3208
    address start = __ pc();
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3209
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3210
    const Register crc   = c_rarg0;  // crc
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3211
    const Register buf   = c_rarg1;  // source java byte array address
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3212
    const Register len   = c_rarg2;  // length
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3213
    const Register table0 = c_rarg3; // crc_table address
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3214
    const Register table1 = c_rarg4;
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3215
    const Register table2 = c_rarg5;
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3216
    const Register table3 = c_rarg6;
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3217
    const Register tmp3 = c_rarg7;
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3218
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3219
    BLOCK_COMMENT("Entry:");
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3220
    __ enter(); // required for proper stackwalking of RuntimeStub frame
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3221
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3222
    __ kernel_crc32c(crc, buf, len,
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3223
              table0, table1, table2, table3, rscratch1, rscratch2, tmp3);
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3224
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3225
    __ leave(); // required for proper stackwalking of RuntimeStub frame
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3226
    __ ret(lr);
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3227
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3228
    return start;
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3229
  }
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3230
33176
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3231
  /***
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3232
   *  Arguments:
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3233
   *
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3234
   *  Inputs:
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3235
   *   c_rarg0   - int   adler
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3236
   *   c_rarg1   - byte* buff
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3237
   *   c_rarg2   - int   len
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3238
   *
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3239
   * Output:
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3240
   *   c_rarg0   - int adler result
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3241
   */
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3242
  address generate_updateBytesAdler32() {
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3243
    __ align(CodeEntryAlignment);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3244
    StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3245
    address start = __ pc();
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3246
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3247
    Label L_simple_by1_loop, L_nmax, L_nmax_loop, L_by16, L_by16_loop, L_by1_loop, L_do_mod, L_combine, L_by1;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3248
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3249
    // Aliases
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3250
    Register adler  = c_rarg0;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3251
    Register s1     = c_rarg0;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3252
    Register s2     = c_rarg3;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3253
    Register buff   = c_rarg1;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3254
    Register len    = c_rarg2;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3255
    Register nmax  = r4;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3256
    Register base = r5;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3257
    Register count = r6;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3258
    Register temp0 = rscratch1;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3259
    Register temp1 = rscratch2;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3260
    Register temp2 = r7;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3261
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3262
    // Max number of bytes we can process before having to take the mod
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3263
    // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3264
    unsigned long BASE = 0xfff1;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3265
    unsigned long NMAX = 0x15B0;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3266
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3267
    __ mov(base, BASE);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3268
    __ mov(nmax, NMAX);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3269
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3270
    // s1 is initialized to the lower 16 bits of adler
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3271
    // s2 is initialized to the upper 16 bits of adler
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3272
    __ ubfx(s2, adler, 16, 16);  // s2 = ((adler >> 16) & 0xffff)
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3273
    __ uxth(s1, adler);          // s1 = (adler & 0xffff)
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3274
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3275
    // The pipelined loop needs at least 16 elements for 1 iteration
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3276
    // It does check this, but it is more effective to skip to the cleanup loop
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  3277
    __ cmp(len, (u1)16);
33176
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3278
    __ br(Assembler::HS, L_nmax);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3279
    __ cbz(len, L_combine);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3280
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3281
    __ bind(L_simple_by1_loop);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3282
    __ ldrb(temp0, Address(__ post(buff, 1)));
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3283
    __ add(s1, s1, temp0);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3284
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3285
    __ subs(len, len, 1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3286
    __ br(Assembler::HI, L_simple_by1_loop);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3287
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3288
    // s1 = s1 % BASE
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3289
    __ subs(temp0, s1, base);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3290
    __ csel(s1, temp0, s1, Assembler::HS);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3291
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3292
    // s2 = s2 % BASE
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3293
    __ lsr(temp0, s2, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3294
    __ lsl(temp1, temp0, 4);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3295
    __ sub(temp1, temp1, temp0);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3296
    __ add(s2, temp1, s2, ext::uxth);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3297
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3298
    __ subs(temp0, s2, base);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3299
    __ csel(s2, temp0, s2, Assembler::HS);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3300
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3301
    __ b(L_combine);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3302
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3303
    __ bind(L_nmax);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3304
    __ subs(len, len, nmax);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3305
    __ sub(count, nmax, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3306
    __ br(Assembler::LO, L_by16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3307
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3308
    __ bind(L_nmax_loop);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3309
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3310
    __ ldp(temp0, temp1, Address(__ post(buff, 16)));
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3311
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3312
    __ add(s1, s1, temp0, ext::uxtb);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3313
    __ ubfx(temp2, temp0, 8, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3314
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3315
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3316
    __ ubfx(temp2, temp0, 16, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3317
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3318
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3319
    __ ubfx(temp2, temp0, 24, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3320
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3321
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3322
    __ ubfx(temp2, temp0, 32, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3323
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3324
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3325
    __ ubfx(temp2, temp0, 40, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3326
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3327
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3328
    __ ubfx(temp2, temp0, 48, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3329
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3330
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3331
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3332
    __ add(s1, s1, temp0, Assembler::LSR, 56);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3333
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3334
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3335
    __ add(s1, s1, temp1, ext::uxtb);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3336
    __ ubfx(temp2, temp1, 8, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3337
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3338
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3339
    __ ubfx(temp2, temp1, 16, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3340
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3341
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3342
    __ ubfx(temp2, temp1, 24, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3343
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3344
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3345
    __ ubfx(temp2, temp1, 32, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3346
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3347
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3348
    __ ubfx(temp2, temp1, 40, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3349
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3350
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3351
    __ ubfx(temp2, temp1, 48, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3352
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3353
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3354
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3355
    __ add(s1, s1, temp1, Assembler::LSR, 56);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3356
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3357
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3358
    __ subs(count, count, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3359
    __ br(Assembler::HS, L_nmax_loop);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3360
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3361
    // s1 = s1 % BASE
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3362
    __ lsr(temp0, s1, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3363
    __ lsl(temp1, temp0, 4);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3364
    __ sub(temp1, temp1, temp0);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3365
    __ add(temp1, temp1, s1, ext::uxth);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3366
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3367
    __ lsr(temp0, temp1, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3368
    __ lsl(s1, temp0, 4);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3369
    __ sub(s1, s1, temp0);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3370
    __ add(s1, s1, temp1, ext:: uxth);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3371
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3372
    __ subs(temp0, s1, base);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3373
    __ csel(s1, temp0, s1, Assembler::HS);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3374
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3375
    // s2 = s2 % BASE
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3376
    __ lsr(temp0, s2, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3377
    __ lsl(temp1, temp0, 4);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3378
    __ sub(temp1, temp1, temp0);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3379
    __ add(temp1, temp1, s2, ext::uxth);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3380
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3381
    __ lsr(temp0, temp1, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3382
    __ lsl(s2, temp0, 4);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3383
    __ sub(s2, s2, temp0);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3384
    __ add(s2, s2, temp1, ext:: uxth);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3385
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3386
    __ subs(temp0, s2, base);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3387
    __ csel(s2, temp0, s2, Assembler::HS);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3388
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3389
    __ subs(len, len, nmax);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3390
    __ sub(count, nmax, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3391
    __ br(Assembler::HS, L_nmax_loop);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3392
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3393
    __ bind(L_by16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3394
    __ adds(len, len, count);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3395
    __ br(Assembler::LO, L_by1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3396
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3397
    __ bind(L_by16_loop);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3398
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3399
    __ ldp(temp0, temp1, Address(__ post(buff, 16)));
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3400
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3401
    __ add(s1, s1, temp0, ext::uxtb);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3402
    __ ubfx(temp2, temp0, 8, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3403
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3404
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3405
    __ ubfx(temp2, temp0, 16, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3406
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3407
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3408
    __ ubfx(temp2, temp0, 24, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3409
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3410
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3411
    __ ubfx(temp2, temp0, 32, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3412
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3413
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3414
    __ ubfx(temp2, temp0, 40, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3415
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3416
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3417
    __ ubfx(temp2, temp0, 48, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3418
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3419
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3420
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3421
    __ add(s1, s1, temp0, Assembler::LSR, 56);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3422
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3423
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3424
    __ add(s1, s1, temp1, ext::uxtb);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3425
    __ ubfx(temp2, temp1, 8, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3426
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3427
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3428
    __ ubfx(temp2, temp1, 16, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3429
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3430
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3431
    __ ubfx(temp2, temp1, 24, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3432
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3433
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3434
    __ ubfx(temp2, temp1, 32, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3435
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3436
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3437
    __ ubfx(temp2, temp1, 40, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3438
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3439
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3440
    __ ubfx(temp2, temp1, 48, 8);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3441
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3442
    __ add(s1, s1, temp2);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3443
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3444
    __ add(s1, s1, temp1, Assembler::LSR, 56);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3445
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3446
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3447
    __ subs(len, len, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3448
    __ br(Assembler::HS, L_by16_loop);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3449
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3450
    __ bind(L_by1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3451
    __ adds(len, len, 15);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3452
    __ br(Assembler::LO, L_do_mod);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3453
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3454
    __ bind(L_by1_loop);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3455
    __ ldrb(temp0, Address(__ post(buff, 1)));
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3456
    __ add(s1, temp0, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3457
    __ add(s2, s2, s1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3458
    __ subs(len, len, 1);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3459
    __ br(Assembler::HS, L_by1_loop);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3460
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3461
    __ bind(L_do_mod);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3462
    // s1 = s1 % BASE
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3463
    __ lsr(temp0, s1, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3464
    __ lsl(temp1, temp0, 4);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3465
    __ sub(temp1, temp1, temp0);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3466
    __ add(temp1, temp1, s1, ext::uxth);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3467
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3468
    __ lsr(temp0, temp1, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3469
    __ lsl(s1, temp0, 4);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3470
    __ sub(s1, s1, temp0);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3471
    __ add(s1, s1, temp1, ext:: uxth);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3472
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3473
    __ subs(temp0, s1, base);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3474
    __ csel(s1, temp0, s1, Assembler::HS);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3475
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3476
    // s2 = s2 % BASE
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3477
    __ lsr(temp0, s2, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3478
    __ lsl(temp1, temp0, 4);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3479
    __ sub(temp1, temp1, temp0);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3480
    __ add(temp1, temp1, s2, ext::uxth);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3481
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3482
    __ lsr(temp0, temp1, 16);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3483
    __ lsl(s2, temp0, 4);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3484
    __ sub(s2, s2, temp0);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3485
    __ add(s2, s2, temp1, ext:: uxth);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3486
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3487
    __ subs(temp0, s2, base);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3488
    __ csel(s2, temp0, s2, Assembler::HS);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3489
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3490
    // Combine lower bits and higher bits
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3491
    __ bind(L_combine);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3492
    __ orr(s1, s1, s2, Assembler::LSL, 16); // adler = s1 | (s2 << 16)
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3493
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3494
    __ ret(lr);
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3495
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3496
    return start;
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3497
  }
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  3498
31591
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3499
  /**
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3500
   *  Arguments:
82134a118aea 8130687: aarch64: add support for hardware crc32c
enevill
parents: 30553
diff changeset
  3501
   *
30225
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3502
   *  Input:
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3503
   *    c_rarg0   - x address
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3504
   *    c_rarg1   - x length
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3505
   *    c_rarg2   - y address
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3506
   *    c_rarg3   - y lenth
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3507
   *    c_rarg4   - z address
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3508
   *    c_rarg5   - z length
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3509
   */
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3510
  address generate_multiplyToLen() {
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3511
    __ align(CodeEntryAlignment);
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3512
    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3513
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3514
    address start = __ pc();
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3515
    const Register x     = r0;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3516
    const Register xlen  = r1;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3517
    const Register y     = r2;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3518
    const Register ylen  = r3;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3519
    const Register z     = r4;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3520
    const Register zlen  = r5;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3521
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3522
    const Register tmp1  = r10;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3523
    const Register tmp2  = r11;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3524
    const Register tmp3  = r12;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3525
    const Register tmp4  = r13;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3526
    const Register tmp5  = r14;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3527
    const Register tmp6  = r15;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3528
    const Register tmp7  = r16;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3529
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3530
    BLOCK_COMMENT("Entry:");
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3531
    __ enter(); // required for proper stackwalking of RuntimeStub frame
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3532
    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3533
    __ leave(); // required for proper stackwalking of RuntimeStub frame
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3534
    __ ret(lr);
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3535
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3536
    return start;
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  3537
  }
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  3538
47571
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3539
  address generate_squareToLen() {
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3540
    // squareToLen algorithm for sizes 1..127 described in java code works
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3541
    // faster than multiply_to_len on some CPUs and slower on others, but
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3542
    // multiply_to_len shows a bit better overall results
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3543
    __ align(CodeEntryAlignment);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3544
    StubCodeMark mark(this, "StubRoutines", "squareToLen");
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3545
    address start = __ pc();
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3546
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3547
    const Register x     = r0;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3548
    const Register xlen  = r1;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3549
    const Register z     = r2;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3550
    const Register zlen  = r3;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3551
    const Register y     = r4; // == x
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3552
    const Register ylen  = r5; // == xlen
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3553
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3554
    const Register tmp1  = r10;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3555
    const Register tmp2  = r11;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3556
    const Register tmp3  = r12;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3557
    const Register tmp4  = r13;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3558
    const Register tmp5  = r14;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3559
    const Register tmp6  = r15;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3560
    const Register tmp7  = r16;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3561
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3562
    RegSet spilled_regs = RegSet::of(y, ylen);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3563
    BLOCK_COMMENT("Entry:");
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3564
    __ enter();
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3565
    __ push(spilled_regs, sp);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3566
    __ mov(y, x);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3567
    __ mov(ylen, xlen);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3568
    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3569
    __ pop(spilled_regs, sp);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3570
    __ leave();
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3571
    __ ret(lr);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3572
    return start;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3573
  }
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3574
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3575
  address generate_mulAdd() {
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3576
    __ align(CodeEntryAlignment);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3577
    StubCodeMark mark(this, "StubRoutines", "mulAdd");
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3578
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3579
    address start = __ pc();
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3580
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3581
    const Register out     = r0;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3582
    const Register in      = r1;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3583
    const Register offset  = r2;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3584
    const Register len     = r3;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3585
    const Register k       = r4;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3586
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3587
    BLOCK_COMMENT("Entry:");
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3588
    __ enter();
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3589
    __ mul_add(out, in, offset, len, k);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3590
    __ leave();
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3591
    __ ret(lr);
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3592
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3593
    return start;
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3594
  }
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  3595
32574
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3596
  void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3597
                      FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3598
                      FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) {
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3599
    // Karatsuba multiplication performs a 128*128 -> 256-bit
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3600
    // multiplication in three 128-bit multiplications and a few
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3601
    // additions.
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3602
    //
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3603
    // (C1:C0) = A1*B1, (D1:D0) = A0*B0, (E1:E0) = (A0+A1)(B0+B1)
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3604
    // (A1:A0)(B1:B0) = C1:(C0+C1+D1+E1):(D1+C0+D0+E0):D0
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3605
    //
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3606
    // Inputs:
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3607
    //
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3608
    // A0 in a.d[0]     (subkey)
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3609
    // A1 in a.d[1]
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3610
    // (A1+A0) in a1_xor_a0.d[0]
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3611
    //
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3612
    // B0 in b.d[0]     (state)
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3613
    // B1 in b.d[1]
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3614
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3615
    __ ext(tmp1, __ T16B, b, b, 0x08);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3616
    __ pmull2(result_hi, __ T1Q, b, a, __ T2D);  // A1*B1
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3617
    __ eor(tmp1, __ T16B, tmp1, b);            // (B1+B0)
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3618
    __ pmull(result_lo,  __ T1Q, b, a, __ T1D);  // A0*B0
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3619
    __ pmull(tmp2, __ T1Q, tmp1, a1_xor_a0, __ T1D); // (A1+A0)(B1+B0)
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3620
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3621
    __ ext(tmp4, __ T16B, result_lo, result_hi, 0x08);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3622
    __ eor(tmp3, __ T16B, result_hi, result_lo); // A1*B1+A0*B0
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3623
    __ eor(tmp2, __ T16B, tmp2, tmp4);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3624
    __ eor(tmp2, __ T16B, tmp2, tmp3);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3625
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3626
    // Register pair <result_hi:result_lo> holds the result of carry-less multiplication
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3627
    __ ins(result_hi, __ D, tmp2, 0, 1);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3628
    __ ins(result_lo, __ D, tmp2, 1, 0);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3629
  }
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3630
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3631
  void ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi,
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3632
                    FloatRegister p, FloatRegister z, FloatRegister t1) {
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3633
    const FloatRegister t0 = result;
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3634
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3635
    // The GCM field polynomial f is z^128 + p(z), where p =
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3636
    // z^7+z^2+z+1.
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3637
    //
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3638
    //    z^128 === -p(z)  (mod (z^128 + p(z)))
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3639
    //
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3640
    // so, given that the product we're reducing is
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3641
    //    a == lo + hi * z^128
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3642
    // substituting,
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3643
    //      === lo - hi * p(z)  (mod (z^128 + p(z)))
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3644
    //
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3645
    // we reduce by multiplying hi by p(z) and subtracting the result
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3646
    // from (i.e. XORing it with) lo.  Because p has no nonzero high
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3647
    // bits we can do this with two 64-bit multiplications, lo*p and
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3648
    // hi*p.
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3649
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3650
    __ pmull2(t0, __ T1Q, hi, p, __ T2D);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3651
    __ ext(t1, __ T16B, t0, z, 8);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3652
    __ eor(hi, __ T16B, hi, t1);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3653
    __ ext(t1, __ T16B, z, t0, 8);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3654
    __ eor(lo, __ T16B, lo, t1);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3655
    __ pmull(t0, __ T1Q, hi, p, __ T1D);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3656
    __ eor(result, __ T16B, lo, t0);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3657
  }
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  3658
46814
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3659
  address generate_has_negatives(address &has_negatives_long) {
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  3660
    const u1 large_loop_size = 64;
46814
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3661
    const uint64_t UPPER_BIT_MASK=0x8080808080808080;
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3662
    int dcache_line = VM_Version::dcache_line_size();
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3663
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3664
    Register ary1 = r1, len = r2, result = r0;
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3665
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3666
    __ align(CodeEntryAlignment);
52977
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
  3667
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
  3668
    StubCodeMark mark(this, "StubRoutines", "has_negatives");
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
  3669
46814
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3670
    address entry = __ pc();
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3671
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3672
    __ enter();
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3673
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3674
  Label RET_TRUE, RET_TRUE_NO_POP, RET_FALSE, ALIGNED, LOOP16, CHECK_16, DONE,
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3675
        LARGE_LOOP, POST_LOOP16, LEN_OVER_15, LEN_OVER_8, POST_LOOP16_LOAD_TAIL;
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3676
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  3677
  __ cmp(len, (u1)15);
46814
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3678
  __ br(Assembler::GT, LEN_OVER_15);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3679
  // The only case when execution falls into this code is when pointer is near
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3680
  // the end of memory page and we have to avoid reading next page
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3681
  __ add(ary1, ary1, len);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3682
  __ subs(len, len, 8);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3683
  __ br(Assembler::GT, LEN_OVER_8);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3684
  __ ldr(rscratch2, Address(ary1, -8));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3685
  __ sub(rscratch1, zr, len, __ LSL, 3);  // LSL 3 is to get bits from bytes.
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3686
  __ lsrv(rscratch2, rscratch2, rscratch1);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3687
  __ tst(rscratch2, UPPER_BIT_MASK);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3688
  __ cset(result, Assembler::NE);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3689
  __ leave();
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3690
  __ ret(lr);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3691
  __ bind(LEN_OVER_8);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3692
  __ ldp(rscratch1, rscratch2, Address(ary1, -16));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3693
  __ sub(len, len, 8); // no data dep., then sub can be executed while loading
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3694
  __ tst(rscratch2, UPPER_BIT_MASK);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3695
  __ br(Assembler::NE, RET_TRUE_NO_POP);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3696
  __ sub(rscratch2, zr, len, __ LSL, 3); // LSL 3 is to get bits from bytes
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3697
  __ lsrv(rscratch1, rscratch1, rscratch2);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3698
  __ tst(rscratch1, UPPER_BIT_MASK);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3699
  __ cset(result, Assembler::NE);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3700
  __ leave();
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3701
  __ ret(lr);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3702
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3703
  Register tmp1 = r3, tmp2 = r4, tmp3 = r5, tmp4 = r6, tmp5 = r7, tmp6 = r10;
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3704
  const RegSet spilled_regs = RegSet::range(tmp1, tmp5) + tmp6;
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3705
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3706
  has_negatives_long = __ pc(); // 2nd entry point
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3707
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3708
  __ enter();
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3709
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3710
  __ bind(LEN_OVER_15);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3711
    __ push(spilled_regs, sp);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3712
    __ andr(rscratch2, ary1, 15); // check pointer for 16-byte alignment
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3713
    __ cbz(rscratch2, ALIGNED);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3714
    __ ldp(tmp6, tmp1, Address(ary1));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3715
    __ mov(tmp5, 16);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3716
    __ sub(rscratch1, tmp5, rscratch2); // amount of bytes until aligned address
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3717
    __ add(ary1, ary1, rscratch1);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3718
    __ sub(len, len, rscratch1);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3719
    __ orr(tmp6, tmp6, tmp1);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3720
    __ tst(tmp6, UPPER_BIT_MASK);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3721
    __ br(Assembler::NE, RET_TRUE);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3722
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3723
  __ bind(ALIGNED);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3724
    __ cmp(len, large_loop_size);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3725
    __ br(Assembler::LT, CHECK_16);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3726
    // Perform 16-byte load as early return in pre-loop to handle situation
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3727
    // when initially aligned large array has negative values at starting bytes,
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3728
    // so LARGE_LOOP would do 4 reads instead of 1 (in worst case), which is
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3729
    // slower. Cases with negative bytes further ahead won't be affected that
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3730
    // much. In fact, it'll be faster due to early loads, less instructions and
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3731
    // less branches in LARGE_LOOP.
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3732
    __ ldp(tmp6, tmp1, Address(__ post(ary1, 16)));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3733
    __ sub(len, len, 16);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3734
    __ orr(tmp6, tmp6, tmp1);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3735
    __ tst(tmp6, UPPER_BIT_MASK);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3736
    __ br(Assembler::NE, RET_TRUE);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3737
    __ cmp(len, large_loop_size);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3738
    __ br(Assembler::LT, CHECK_16);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3739
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3740
    if (SoftwarePrefetchHintDistance >= 0
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3741
        && SoftwarePrefetchHintDistance >= dcache_line) {
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3742
      // initial prefetch
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3743
      __ prfm(Address(ary1, SoftwarePrefetchHintDistance - dcache_line));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3744
    }
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3745
  __ bind(LARGE_LOOP);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3746
    if (SoftwarePrefetchHintDistance >= 0) {
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3747
      __ prfm(Address(ary1, SoftwarePrefetchHintDistance));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3748
    }
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3749
    // Issue load instructions first, since it can save few CPU/MEM cycles, also
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3750
    // instead of 4 triples of "orr(...), addr(...);cbnz(...);" (for each ldp)
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3751
    // better generate 7 * orr(...) + 1 andr(...) + 1 cbnz(...) which saves 3
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3752
    // instructions per cycle and have less branches, but this approach disables
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3753
    // early return, thus, all 64 bytes are loaded and checked every time.
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3754
    __ ldp(tmp2, tmp3, Address(ary1));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3755
    __ ldp(tmp4, tmp5, Address(ary1, 16));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3756
    __ ldp(rscratch1, rscratch2, Address(ary1, 32));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3757
    __ ldp(tmp6, tmp1, Address(ary1, 48));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3758
    __ add(ary1, ary1, large_loop_size);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3759
    __ sub(len, len, large_loop_size);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3760
    __ orr(tmp2, tmp2, tmp3);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3761
    __ orr(tmp4, tmp4, tmp5);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3762
    __ orr(rscratch1, rscratch1, rscratch2);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3763
    __ orr(tmp6, tmp6, tmp1);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3764
    __ orr(tmp2, tmp2, tmp4);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3765
    __ orr(rscratch1, rscratch1, tmp6);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3766
    __ orr(tmp2, tmp2, rscratch1);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3767
    __ tst(tmp2, UPPER_BIT_MASK);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3768
    __ br(Assembler::NE, RET_TRUE);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3769
    __ cmp(len, large_loop_size);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3770
    __ br(Assembler::GE, LARGE_LOOP);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3771
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3772
  __ bind(CHECK_16); // small 16-byte load pre-loop
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  3773
    __ cmp(len, (u1)16);
46814
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3774
    __ br(Assembler::LT, POST_LOOP16);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3775
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3776
  __ bind(LOOP16); // small 16-byte load loop
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3777
    __ ldp(tmp2, tmp3, Address(__ post(ary1, 16)));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3778
    __ sub(len, len, 16);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3779
    __ orr(tmp2, tmp2, tmp3);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3780
    __ tst(tmp2, UPPER_BIT_MASK);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3781
    __ br(Assembler::NE, RET_TRUE);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  3782
    __ cmp(len, (u1)16);
46814
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3783
    __ br(Assembler::GE, LOOP16); // 16-byte load loop end
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3784
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3785
  __ bind(POST_LOOP16); // 16-byte aligned, so we can read unconditionally
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  3786
    __ cmp(len, (u1)8);
46814
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3787
    __ br(Assembler::LE, POST_LOOP16_LOAD_TAIL);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3788
    __ ldr(tmp3, Address(__ post(ary1, 8)));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3789
    __ sub(len, len, 8);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3790
    __ tst(tmp3, UPPER_BIT_MASK);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3791
    __ br(Assembler::NE, RET_TRUE);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3792
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3793
  __ bind(POST_LOOP16_LOAD_TAIL);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3794
    __ cbz(len, RET_FALSE); // Can't shift left by 64 when len==0
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3795
    __ ldr(tmp1, Address(ary1));
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3796
    __ mov(tmp2, 64);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3797
    __ sub(tmp4, tmp2, len, __ LSL, 3);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3798
    __ lslv(tmp1, tmp1, tmp4);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3799
    __ tst(tmp1, UPPER_BIT_MASK);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3800
    __ br(Assembler::NE, RET_TRUE);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3801
    // Fallthrough
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3802
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3803
  __ bind(RET_FALSE);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3804
    __ pop(spilled_regs, sp);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3805
    __ leave();
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3806
    __ mov(result, zr);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3807
    __ ret(lr);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3808
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3809
  __ bind(RET_TRUE);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3810
    __ pop(spilled_regs, sp);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3811
  __ bind(RET_TRUE_NO_POP);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3812
    __ leave();
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3813
    __ mov(result, 1);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3814
    __ ret(lr);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3815
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3816
  __ bind(DONE);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3817
    __ pop(spilled_regs, sp);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3818
    __ leave();
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3819
    __ ret(lr);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3820
    return entry;
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  3821
  }
49724
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3822
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3823
  void generate_large_array_equals_loop_nonsimd(int loopThreshold,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3824
        bool usePrefetch, Label &NOT_EQUAL) {
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3825
    Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3826
        tmp2 = rscratch2, tmp3 = r3, tmp4 = r4, tmp5 = r5, tmp6 = r11,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3827
        tmp7 = r12, tmp8 = r13;
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3828
    Label LOOP;
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3829
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3830
    __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3831
    __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3832
    __ bind(LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3833
    if (usePrefetch) {
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3834
      __ prfm(Address(a1, SoftwarePrefetchHintDistance));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3835
      __ prfm(Address(a2, SoftwarePrefetchHintDistance));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3836
    }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3837
    __ ldp(tmp5, tmp7, Address(__ post(a1, 2 * wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3838
    __ eor(tmp1, tmp1, tmp2);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3839
    __ eor(tmp3, tmp3, tmp4);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3840
    __ ldp(tmp6, tmp8, Address(__ post(a2, 2 * wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3841
    __ orr(tmp1, tmp1, tmp3);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3842
    __ cbnz(tmp1, NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3843
    __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3844
    __ eor(tmp5, tmp5, tmp6);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3845
    __ eor(tmp7, tmp7, tmp8);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3846
    __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3847
    __ orr(tmp5, tmp5, tmp7);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3848
    __ cbnz(tmp5, NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3849
    __ ldp(tmp5, tmp7, Address(__ post(a1, 2 * wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3850
    __ eor(tmp1, tmp1, tmp2);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3851
    __ eor(tmp3, tmp3, tmp4);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3852
    __ ldp(tmp6, tmp8, Address(__ post(a2, 2 * wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3853
    __ orr(tmp1, tmp1, tmp3);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3854
    __ cbnz(tmp1, NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3855
    __ ldp(tmp1, tmp3, Address(__ post(a1, 2 * wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3856
    __ eor(tmp5, tmp5, tmp6);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3857
    __ sub(cnt1, cnt1, 8 * wordSize);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3858
    __ eor(tmp7, tmp7, tmp8);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3859
    __ ldp(tmp2, tmp4, Address(__ post(a2, 2 * wordSize)));
50242
9a87afc49148 8203041: AArch64: fix overflow in immediate cmp/cmn instruction
fyang
parents: 50179
diff changeset
  3860
    // tmp6 is not used. MacroAssembler::subs is used here (rather than
9a87afc49148 8203041: AArch64: fix overflow in immediate cmp/cmn instruction
fyang
parents: 50179
diff changeset
  3861
    // cmp) because subs allows an unlimited range of immediate operand.
9a87afc49148 8203041: AArch64: fix overflow in immediate cmp/cmn instruction
fyang
parents: 50179
diff changeset
  3862
    __ subs(tmp6, cnt1, loopThreshold);
49724
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3863
    __ orr(tmp5, tmp5, tmp7);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3864
    __ cbnz(tmp5, NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3865
    __ br(__ GE, LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3866
    // post-loop
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3867
    __ eor(tmp1, tmp1, tmp2);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3868
    __ eor(tmp3, tmp3, tmp4);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3869
    __ orr(tmp1, tmp1, tmp3);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3870
    __ sub(cnt1, cnt1, 2 * wordSize);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3871
    __ cbnz(tmp1, NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3872
  }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3873
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3874
  void generate_large_array_equals_loop_simd(int loopThreshold,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3875
        bool usePrefetch, Label &NOT_EQUAL) {
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3876
    Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3877
        tmp2 = rscratch2;
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3878
    Label LOOP;
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3879
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3880
    __ bind(LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3881
    if (usePrefetch) {
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3882
      __ prfm(Address(a1, SoftwarePrefetchHintDistance));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3883
      __ prfm(Address(a2, SoftwarePrefetchHintDistance));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3884
    }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3885
    __ ld1(v0, v1, v2, v3, __ T2D, Address(__ post(a1, 4 * 2 * wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3886
    __ sub(cnt1, cnt1, 8 * wordSize);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3887
    __ ld1(v4, v5, v6, v7, __ T2D, Address(__ post(a2, 4 * 2 * wordSize)));
50242
9a87afc49148 8203041: AArch64: fix overflow in immediate cmp/cmn instruction
fyang
parents: 50179
diff changeset
  3888
    __ subs(tmp1, cnt1, loopThreshold);
49724
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3889
    __ eor(v0, __ T16B, v0, v4);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3890
    __ eor(v1, __ T16B, v1, v5);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3891
    __ eor(v2, __ T16B, v2, v6);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3892
    __ eor(v3, __ T16B, v3, v7);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3893
    __ orr(v0, __ T16B, v0, v1);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3894
    __ orr(v1, __ T16B, v2, v3);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3895
    __ orr(v0, __ T16B, v0, v1);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3896
    __ umov(tmp1, v0, __ D, 0);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3897
    __ umov(tmp2, v0, __ D, 1);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3898
    __ orr(tmp1, tmp1, tmp2);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3899
    __ cbnz(tmp1, NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3900
    __ br(__ GE, LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3901
  }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3902
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3903
  // a1 = r1 - array1 address
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3904
  // a2 = r2 - array2 address
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3905
  // result = r0 - return value. Already contains "false"
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3906
  // cnt1 = r10 - amount of elements left to check, reduced by wordSize
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3907
  // r3-r5 are reserved temporary registers
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3908
  address generate_large_array_equals() {
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3909
    Register a1 = r1, a2 = r2, result = r0, cnt1 = r10, tmp1 = rscratch1,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3910
        tmp2 = rscratch2, tmp3 = r3, tmp4 = r4, tmp5 = r5, tmp6 = r11,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3911
        tmp7 = r12, tmp8 = r13;
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3912
    Label TAIL, NOT_EQUAL, EQUAL, NOT_EQUAL_NO_POP, NO_PREFETCH_LARGE_LOOP,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3913
        SMALL_LOOP, POST_LOOP;
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3914
    const int PRE_LOOP_SIZE = UseSIMDForArrayEquals ? 0 : 16;
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3915
    // calculate if at least 32 prefetched bytes are used
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3916
    int prefetchLoopThreshold = SoftwarePrefetchHintDistance + 32;
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3917
    int nonPrefetchLoopThreshold = (64 + PRE_LOOP_SIZE);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3918
    RegSet spilled_regs = RegSet::range(tmp6, tmp8);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3919
    assert_different_registers(a1, a2, result, cnt1, tmp1, tmp2, tmp3, tmp4,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3920
        tmp5, tmp6, tmp7, tmp8);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3921
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3922
    __ align(CodeEntryAlignment);
52977
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
  3923
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
  3924
    StubCodeMark mark(this, "StubRoutines", "large_array_equals");
2e4903f83295 8205421: AARCH64: StubCodeMark should be placed after alignment
dpochepk
parents: 52927
diff changeset
  3925
49724
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3926
    address entry = __ pc();
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3927
    __ enter();
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3928
    __ sub(cnt1, cnt1, wordSize);  // first 8 bytes were loaded outside of stub
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3929
    // also advance pointers to use post-increment instead of pre-increment
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3930
    __ add(a1, a1, wordSize);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3931
    __ add(a2, a2, wordSize);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3932
    if (AvoidUnalignedAccesses) {
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3933
      // both implementations (SIMD/nonSIMD) are using relatively large load
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3934
      // instructions (ld1/ldp), which has huge penalty (up to x2 exec time)
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3935
      // on some CPUs in case of address is not at least 16-byte aligned.
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3936
      // Arrays are 8-byte aligned currently, so, we can make additional 8-byte
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3937
      // load if needed at least for 1st address and make if 16-byte aligned.
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3938
      Label ALIGNED16;
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3939
      __ tbz(a1, 3, ALIGNED16);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3940
      __ ldr(tmp1, Address(__ post(a1, wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3941
      __ ldr(tmp2, Address(__ post(a2, wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3942
      __ sub(cnt1, cnt1, wordSize);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3943
      __ eor(tmp1, tmp1, tmp2);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3944
      __ cbnz(tmp1, NOT_EQUAL_NO_POP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3945
      __ bind(ALIGNED16);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3946
    }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3947
    if (UseSIMDForArrayEquals) {
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3948
      if (SoftwarePrefetchHintDistance >= 0) {
50242
9a87afc49148 8203041: AArch64: fix overflow in immediate cmp/cmn instruction
fyang
parents: 50179
diff changeset
  3949
        __ subs(tmp1, cnt1, prefetchLoopThreshold);
49724
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3950
        __ br(__ LE, NO_PREFETCH_LARGE_LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3951
        generate_large_array_equals_loop_simd(prefetchLoopThreshold,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3952
            /* prfm = */ true, NOT_EQUAL);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  3953
        __ subs(zr, cnt1, nonPrefetchLoopThreshold);
49724
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3954
        __ br(__ LT, TAIL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3955
      }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3956
      __ bind(NO_PREFETCH_LARGE_LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3957
      generate_large_array_equals_loop_simd(nonPrefetchLoopThreshold,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3958
          /* prfm = */ false, NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3959
    } else {
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3960
      __ push(spilled_regs, sp);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3961
      if (SoftwarePrefetchHintDistance >= 0) {
50242
9a87afc49148 8203041: AArch64: fix overflow in immediate cmp/cmn instruction
fyang
parents: 50179
diff changeset
  3962
        __ subs(tmp1, cnt1, prefetchLoopThreshold);
49724
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3963
        __ br(__ LE, NO_PREFETCH_LARGE_LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3964
        generate_large_array_equals_loop_nonsimd(prefetchLoopThreshold,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3965
            /* prfm = */ true, NOT_EQUAL);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  3966
        __ subs(zr, cnt1, nonPrefetchLoopThreshold);
49724
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3967
        __ br(__ LT, TAIL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3968
      }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3969
      __ bind(NO_PREFETCH_LARGE_LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3970
      generate_large_array_equals_loop_nonsimd(nonPrefetchLoopThreshold,
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3971
          /* prfm = */ false, NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3972
    }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3973
    __ bind(TAIL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3974
      __ cbz(cnt1, EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3975
      __ subs(cnt1, cnt1, wordSize);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3976
      __ br(__ LE, POST_LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3977
    __ bind(SMALL_LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3978
      __ ldr(tmp1, Address(__ post(a1, wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3979
      __ ldr(tmp2, Address(__ post(a2, wordSize)));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3980
      __ subs(cnt1, cnt1, wordSize);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3981
      __ eor(tmp1, tmp1, tmp2);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3982
      __ cbnz(tmp1, NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3983
      __ br(__ GT, SMALL_LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3984
    __ bind(POST_LOOP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3985
      __ ldr(tmp1, Address(a1, cnt1));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3986
      __ ldr(tmp2, Address(a2, cnt1));
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3987
      __ eor(tmp1, tmp1, tmp2);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3988
      __ cbnz(tmp1, NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3989
    __ bind(EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3990
      __ mov(result, true);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3991
    __ bind(NOT_EQUAL);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3992
      if (!UseSIMDForArrayEquals) {
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3993
        __ pop(spilled_regs, sp);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3994
      }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3995
    __ bind(NOT_EQUAL_NO_POP);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3996
    __ leave();
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3997
    __ ret(lr);
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3998
    return entry;
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  3999
  }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  4000
50754
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4001
  address generate_dsin_dcos(bool isCos) {
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4002
    __ align(CodeEntryAlignment);
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4003
    StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin");
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4004
    address start = __ pc();
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4005
    __ generate_dsin_dcos(isCos, (address)StubRoutines::aarch64::_npio2_hw,
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4006
        (address)StubRoutines::aarch64::_two_over_pi,
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4007
        (address)StubRoutines::aarch64::_pio2,
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4008
        (address)StubRoutines::aarch64::_dsin_coef,
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4009
        (address)StubRoutines::aarch64::_dcos_coef);
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4010
    return start;
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4011
  }
ccb8aa083958 8189105: AARCH64: create intrinsic for sin and cos
dpochepk
parents: 50753
diff changeset
  4012
50753
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4013
  address generate_dlog() {
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4014
    __ align(CodeEntryAlignment);
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4015
    StubCodeMark mark(this, "StubRoutines", "dlog");
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4016
    address entry = __ pc();
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4017
    FloatRegister vtmp0 = v0, vtmp1 = v1, vtmp2 = v2, vtmp3 = v3, vtmp4 = v4,
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4018
        vtmp5 = v5, tmpC1 = v16, tmpC2 = v17, tmpC3 = v18, tmpC4 = v19;
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4019
    Register tmp1 = r0, tmp2 = r1, tmp3 = r2, tmp4 = r3, tmp5 = r4;
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4020
    __ fast_log(vtmp0, vtmp1, vtmp2, vtmp3, vtmp4, vtmp5, tmpC1, tmpC2, tmpC3,
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4021
        tmpC4, tmp1, tmp2, tmp3, tmp4, tmp5);
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4022
    return entry;
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  4023
  }
49724
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  4024
50756
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4025
  // code for comparing 16 bytes of strings with same encoding
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4026
  void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4027
    Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, tmp1 = r10, tmp2 = r11;
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4028
    __ ldr(rscratch1, Address(__ post(str1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4029
    __ eor(rscratch2, tmp1, tmp2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4030
    __ ldr(cnt1, Address(__ post(str2, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4031
    __ cbnz(rscratch2, DIFF1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4032
    __ ldr(tmp1, Address(__ post(str1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4033
    __ eor(rscratch2, rscratch1, cnt1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4034
    __ ldr(tmp2, Address(__ post(str2, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4035
    __ cbnz(rscratch2, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4036
  }
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4037
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4038
  // code for comparing 16 characters of strings with Latin1 and Utf16 encoding
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4039
  void compare_string_16_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4040
      Label &DIFF2) {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4041
    Register cnt1 = r2, tmp1 = r10, tmp2 = r11, tmp3 = r12;
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4042
    FloatRegister vtmp = v1, vtmpZ = v0, vtmp3 = v2;
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4043
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4044
    __ ldrq(vtmp, Address(__ post(tmp2, 16)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4045
    __ ldr(tmpU, Address(__ post(cnt1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4046
    __ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4047
    // now we have 32 bytes of characters (converted to U) in vtmp:vtmp3
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4048
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4049
    __ fmovd(tmpL, vtmp3);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4050
    __ eor(rscratch2, tmp3, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4051
    __ cbnz(rscratch2, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4052
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4053
    __ ldr(tmp3, Address(__ post(cnt1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4054
    __ umov(tmpL, vtmp3, __ D, 1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4055
    __ eor(rscratch2, tmpU, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4056
    __ cbnz(rscratch2, DIFF1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4057
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4058
    __ zip2(vtmp, __ T16B, vtmp, vtmpZ);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4059
    __ ldr(tmpU, Address(__ post(cnt1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4060
    __ fmovd(tmpL, vtmp);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4061
    __ eor(rscratch2, tmp3, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4062
    __ cbnz(rscratch2, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4063
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4064
    __ ldr(tmp3, Address(__ post(cnt1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4065
    __ umov(tmpL, vtmp, __ D, 1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4066
    __ eor(rscratch2, tmpU, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4067
    __ cbnz(rscratch2, DIFF1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4068
  }
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4069
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4070
  // r0  = result
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4071
  // r1  = str1
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4072
  // r2  = cnt1
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4073
  // r3  = str2
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4074
  // r4  = cnt2
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4075
  // r10 = tmp1
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4076
  // r11 = tmp2
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4077
  address generate_compare_long_string_different_encoding(bool isLU) {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4078
    __ align(CodeEntryAlignment);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4079
    StubCodeMark mark(this, "StubRoutines", isLU
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4080
        ? "compare_long_string_different_encoding LU"
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4081
        : "compare_long_string_different_encoding UL");
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4082
    address entry = __ pc();
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4083
    Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4084
        DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, SMALL_LOOP_ENTER,
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4085
        LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2;
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4086
    Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4087
        tmp1 = r10, tmp2 = r11, tmp3 = r12, tmp4 = r14;
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4088
    FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2;
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4089
    RegSet spilled_regs = RegSet::of(tmp3, tmp4);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4090
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4091
    int prefetchLoopExitCondition = MAX(32, SoftwarePrefetchHintDistance/2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4092
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4093
    __ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4094
    // cnt2 == amount of characters left to compare
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4095
    // Check already loaded first 4 symbols(vtmp and tmp2(LU)/tmp1(UL))
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4096
    __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4097
    __ add(str1, str1, isLU ? wordSize/2 : wordSize);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4098
    __ add(str2, str2, isLU ? wordSize : wordSize/2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4099
    __ fmovd(isLU ? tmp1 : tmp2, vtmp);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4100
    __ subw(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4101
    __ add(str1, str1, cnt2, __ LSL, isLU ? 0 : 1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4102
    __ eor(rscratch2, tmp1, tmp2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4103
    __ add(str2, str2, cnt2, __ LSL, isLU ? 1 : 0);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4104
    __ mov(rscratch1, tmp2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4105
    __ cbnz(rscratch2, CALCULATE_DIFFERENCE);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4106
    Register strU = isLU ? str2 : str1,
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4107
             strL = isLU ? str1 : str2,
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4108
             tmpU = isLU ? rscratch1 : tmp1, // where to keep U for comparison
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4109
             tmpL = isLU ? tmp1 : rscratch1; // where to keep L for comparison
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4110
    __ push(spilled_regs, sp);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4111
    __ sub(tmp2, strL, cnt2); // strL pointer to load from
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4112
    __ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4113
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4114
    __ ldr(tmp3, Address(__ post(cnt1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4115
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4116
    if (SoftwarePrefetchHintDistance >= 0) {
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  4117
      __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
50756
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4118
      __ br(__ LT, SMALL_LOOP);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4119
      __ bind(LARGE_LOOP_PREFETCH);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4120
        __ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4121
        __ mov(tmp4, 2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4122
        __ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4123
        __ bind(LARGE_LOOP_PREFETCH_REPEAT1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4124
          compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4125
          __ subs(tmp4, tmp4, 1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4126
          __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4127
          __ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4128
          __ mov(tmp4, 2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4129
        __ bind(LARGE_LOOP_PREFETCH_REPEAT2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4130
          compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4131
          __ subs(tmp4, tmp4, 1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4132
          __ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4133
          __ sub(cnt2, cnt2, 64);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  4134
          __ subs(rscratch2, cnt2, prefetchLoopExitCondition);
50756
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4135
          __ br(__ GE, LARGE_LOOP_PREFETCH);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4136
    }
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4137
    __ cbz(cnt2, LOAD_LAST); // no characters left except last load
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4138
    __ subs(cnt2, cnt2, 16);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4139
    __ br(__ LT, TAIL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4140
    __ b(SMALL_LOOP_ENTER);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4141
    __ bind(SMALL_LOOP); // smaller loop
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4142
      __ subs(cnt2, cnt2, 16);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4143
    __ bind(SMALL_LOOP_ENTER);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4144
      compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4145
      __ br(__ GE, SMALL_LOOP);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4146
      __ cbz(cnt2, LOAD_LAST);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4147
    __ bind(TAIL); // 1..15 characters left
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  4148
      __ subs(zr, cnt2, -8);
50756
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4149
      __ br(__ GT, TAIL_LOAD_16);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4150
      __ ldrd(vtmp, Address(tmp2));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4151
      __ zip1(vtmp3, __ T8B, vtmp, vtmpZ);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4152
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4153
      __ ldr(tmpU, Address(__ post(cnt1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4154
      __ fmovd(tmpL, vtmp3);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4155
      __ eor(rscratch2, tmp3, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4156
      __ cbnz(rscratch2, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4157
      __ umov(tmpL, vtmp3, __ D, 1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4158
      __ eor(rscratch2, tmpU, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4159
      __ cbnz(rscratch2, DIFF1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4160
      __ b(LOAD_LAST);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4161
    __ bind(TAIL_LOAD_16);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4162
      __ ldrq(vtmp, Address(tmp2));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4163
      __ ldr(tmpU, Address(__ post(cnt1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4164
      __ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4165
      __ zip2(vtmp, __ T16B, vtmp, vtmpZ);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4166
      __ fmovd(tmpL, vtmp3);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4167
      __ eor(rscratch2, tmp3, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4168
      __ cbnz(rscratch2, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4169
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4170
      __ ldr(tmp3, Address(__ post(cnt1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4171
      __ umov(tmpL, vtmp3, __ D, 1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4172
      __ eor(rscratch2, tmpU, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4173
      __ cbnz(rscratch2, DIFF1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4174
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4175
      __ ldr(tmpU, Address(__ post(cnt1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4176
      __ fmovd(tmpL, vtmp);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4177
      __ eor(rscratch2, tmp3, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4178
      __ cbnz(rscratch2, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4179
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4180
      __ umov(tmpL, vtmp, __ D, 1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4181
      __ eor(rscratch2, tmpU, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4182
      __ cbnz(rscratch2, DIFF1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4183
      __ b(LOAD_LAST);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4184
    __ bind(DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4185
      __ mov(tmpU, tmp3);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4186
    __ bind(DIFF1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4187
      __ pop(spilled_regs, sp);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4188
      __ b(CALCULATE_DIFFERENCE);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4189
    __ bind(LOAD_LAST);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4190
      __ pop(spilled_regs, sp);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4191
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4192
      __ ldrs(vtmp, Address(strL));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4193
      __ ldr(tmpU, Address(strU));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4194
      __ zip1(vtmp, __ T8B, vtmp, vtmpZ);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4195
      __ fmovd(tmpL, vtmp);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4196
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4197
      __ eor(rscratch2, tmpU, tmpL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4198
      __ cbz(rscratch2, DONE);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4199
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4200
    // Find the first different characters in the longwords and
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4201
    // compute their difference.
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4202
    __ bind(CALCULATE_DIFFERENCE);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4203
      __ rev(rscratch2, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4204
      __ clz(rscratch2, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4205
      __ andr(rscratch2, rscratch2, -16);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4206
      __ lsrv(tmp1, tmp1, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4207
      __ uxthw(tmp1, tmp1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4208
      __ lsrv(rscratch1, rscratch1, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4209
      __ uxthw(rscratch1, rscratch1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4210
      __ subw(result, tmp1, rscratch1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4211
    __ bind(DONE);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4212
      __ ret(lr);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4213
    return entry;
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4214
  }
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4215
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4216
  // r0  = result
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4217
  // r1  = str1
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4218
  // r2  = cnt1
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4219
  // r3  = str2
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4220
  // r4  = cnt2
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4221
  // r10 = tmp1
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4222
  // r11 = tmp2
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4223
  address generate_compare_long_string_same_encoding(bool isLL) {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4224
    __ align(CodeEntryAlignment);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4225
    StubCodeMark mark(this, "StubRoutines", isLL
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4226
        ? "compare_long_string_same_encoding LL"
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4227
        : "compare_long_string_same_encoding UU");
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4228
    address entry = __ pc();
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4229
    Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4230
        tmp1 = r10, tmp2 = r11;
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4231
    Label SMALL_LOOP, LARGE_LOOP_PREFETCH, CHECK_LAST, DIFF2, TAIL,
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4232
        LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF,
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4233
        DIFF_LAST_POSITION, DIFF_LAST_POSITION2;
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4234
    // exit from large loop when less than 64 bytes left to read or we're about
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4235
    // to prefetch memory behind array border
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4236
    int largeLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4237
    // cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4238
    // update cnt2 counter with already loaded 8 bytes
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4239
    __ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4240
    // update pointers, because of previous read
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4241
    __ add(str1, str1, wordSize);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4242
    __ add(str2, str2, wordSize);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4243
    if (SoftwarePrefetchHintDistance >= 0) {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4244
      __ bind(LARGE_LOOP_PREFETCH);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4245
        __ prfm(Address(str1, SoftwarePrefetchHintDistance));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4246
        __ prfm(Address(str2, SoftwarePrefetchHintDistance));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4247
        compare_string_16_bytes_same(DIFF, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4248
        compare_string_16_bytes_same(DIFF, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4249
        __ sub(cnt2, cnt2, isLL ? 64 : 32);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4250
        compare_string_16_bytes_same(DIFF, DIFF2);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  4251
        __ subs(rscratch2, cnt2, largeLoopExitCondition);
50756
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4252
        compare_string_16_bytes_same(DIFF, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4253
        __ br(__ GT, LARGE_LOOP_PREFETCH);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4254
        __ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4255
        // less than 16 bytes left?
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4256
        __ subs(cnt2, cnt2, isLL ? 16 : 8);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4257
        __ br(__ LT, TAIL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4258
    }
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4259
    __ bind(SMALL_LOOP);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4260
      compare_string_16_bytes_same(DIFF, DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4261
      __ subs(cnt2, cnt2, isLL ? 16 : 8);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4262
      __ br(__ GE, SMALL_LOOP);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4263
    __ bind(TAIL);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4264
      __ adds(cnt2, cnt2, isLL ? 16 : 8);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4265
      __ br(__ EQ, LAST_CHECK_AND_LENGTH_DIFF);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4266
      __ subs(cnt2, cnt2, isLL ? 8 : 4);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4267
      __ br(__ LE, CHECK_LAST);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4268
      __ eor(rscratch2, tmp1, tmp2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4269
      __ cbnz(rscratch2, DIFF);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4270
      __ ldr(tmp1, Address(__ post(str1, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4271
      __ ldr(tmp2, Address(__ post(str2, 8)));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4272
      __ sub(cnt2, cnt2, isLL ? 8 : 4);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4273
    __ bind(CHECK_LAST);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4274
      if (!isLL) {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4275
        __ add(cnt2, cnt2, cnt2); // now in bytes
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4276
      }
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4277
      __ eor(rscratch2, tmp1, tmp2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4278
      __ cbnz(rscratch2, DIFF);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4279
      __ ldr(rscratch1, Address(str1, cnt2));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4280
      __ ldr(cnt1, Address(str2, cnt2));
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4281
      __ eor(rscratch2, rscratch1, cnt1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4282
      __ cbz(rscratch2, LENGTH_DIFF);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4283
      // Find the first different characters in the longwords and
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4284
      // compute their difference.
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4285
    __ bind(DIFF2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4286
      __ rev(rscratch2, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4287
      __ clz(rscratch2, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4288
      __ andr(rscratch2, rscratch2, isLL ? -8 : -16);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4289
      __ lsrv(rscratch1, rscratch1, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4290
      if (isLL) {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4291
        __ lsrv(cnt1, cnt1, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4292
        __ uxtbw(rscratch1, rscratch1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4293
        __ uxtbw(cnt1, cnt1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4294
      } else {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4295
        __ lsrv(cnt1, cnt1, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4296
        __ uxthw(rscratch1, rscratch1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4297
        __ uxthw(cnt1, cnt1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4298
      }
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4299
      __ subw(result, rscratch1, cnt1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4300
      __ b(LENGTH_DIFF);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4301
    __ bind(DIFF);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4302
      __ rev(rscratch2, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4303
      __ clz(rscratch2, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4304
      __ andr(rscratch2, rscratch2, isLL ? -8 : -16);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4305
      __ lsrv(tmp1, tmp1, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4306
      if (isLL) {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4307
        __ lsrv(tmp2, tmp2, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4308
        __ uxtbw(tmp1, tmp1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4309
        __ uxtbw(tmp2, tmp2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4310
      } else {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4311
        __ lsrv(tmp2, tmp2, rscratch2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4312
        __ uxthw(tmp1, tmp1);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4313
        __ uxthw(tmp2, tmp2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4314
      }
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4315
      __ subw(result, tmp1, tmp2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4316
      __ b(LENGTH_DIFF);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4317
    __ bind(LAST_CHECK_AND_LENGTH_DIFF);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4318
      __ eor(rscratch2, tmp1, tmp2);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4319
      __ cbnz(rscratch2, DIFF);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4320
    __ bind(LENGTH_DIFF);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4321
      __ ret(lr);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4322
    return entry;
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4323
  }
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4324
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4325
  void generate_compare_long_strings() {
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4326
      StubRoutines::aarch64::_compare_long_string_LL
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4327
          = generate_compare_long_string_same_encoding(true);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4328
      StubRoutines::aarch64::_compare_long_string_UU
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4329
          = generate_compare_long_string_same_encoding(false);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4330
      StubRoutines::aarch64::_compare_long_string_LU
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4331
          = generate_compare_long_string_different_encoding(true);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4332
      StubRoutines::aarch64::_compare_long_string_UL
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4333
          = generate_compare_long_string_different_encoding(false);
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4334
  }
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  4335
50757
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4336
  // R0 = result
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4337
  // R1 = str2
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4338
  // R2 = cnt1
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4339
  // R3 = str1
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4340
  // R4 = cnt2
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4341
  // This generic linear code use few additional ideas, which makes it faster:
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4342
  // 1) we can safely keep at least 1st register of pattern(since length >= 8)
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4343
  // in order to skip initial loading(help in systems with 1 ld pipeline)
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4344
  // 2) we can use "fast" algorithm of finding single character to search for
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4345
  // first symbol with less branches(1 branch per each loaded register instead
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4346
  // of branch for each symbol), so, this is where constants like
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4347
  // 0x0101...01, 0x00010001...0001, 0x7f7f...7f, 0x7fff7fff...7fff comes from
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4348
  // 3) after loading and analyzing 1st register of source string, it can be
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4349
  // used to search for every 1st character entry, saving few loads in
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4350
  // comparison with "simplier-but-slower" implementation
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4351
  // 4) in order to avoid lots of push/pop operations, code below is heavily
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4352
  // re-using/re-initializing/compressing register values, which makes code
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4353
  // larger and a bit less readable, however, most of extra operations are
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4354
  // issued during loads or branches, so, penalty is minimal
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4355
  address generate_string_indexof_linear(bool str1_isL, bool str2_isL) {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4356
    const char* stubName = str1_isL
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4357
        ? (str2_isL ? "indexof_linear_ll" : "indexof_linear_ul")
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4358
        : "indexof_linear_uu";
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4359
    __ align(CodeEntryAlignment);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4360
    StubCodeMark mark(this, "StubRoutines", stubName);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4361
    address entry = __ pc();
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4362
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4363
    int str1_chr_size = str1_isL ? 1 : 2;
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4364
    int str2_chr_size = str2_isL ? 1 : 2;
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4365
    int str1_chr_shift = str1_isL ? 0 : 1;
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4366
    int str2_chr_shift = str2_isL ? 0 : 1;
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4367
    bool isL = str1_isL && str2_isL;
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4368
   // parameters
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4369
    Register result = r0, str2 = r1, cnt1 = r2, str1 = r3, cnt2 = r4;
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4370
    // temporary registers
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4371
    Register tmp1 = r20, tmp2 = r21, tmp3 = r22, tmp4 = r23;
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4372
    RegSet spilled_regs = RegSet::range(tmp1, tmp4);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4373
    // redefinitions
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4374
    Register ch1 = rscratch1, ch2 = rscratch2, first = tmp3;
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4375
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4376
    __ push(spilled_regs, sp);
51756
4bd35a5ec694 8210676: Remove some unused Label variables
mikael
parents: 51619
diff changeset
  4377
    Label L_LOOP, L_LOOP_PROCEED, L_SMALL, L_HAS_ZERO,
50757
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4378
        L_HAS_ZERO_LOOP, L_CMP_LOOP, L_CMP_LOOP_NOMATCH, L_SMALL_PROCEED,
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4379
        L_SMALL_HAS_ZERO_LOOP, L_SMALL_CMP_LOOP_NOMATCH, L_SMALL_CMP_LOOP,
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4380
        L_POST_LOOP, L_CMP_LOOP_LAST_CMP, L_HAS_ZERO_LOOP_NOMATCH,
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4381
        L_SMALL_CMP_LOOP_LAST_CMP, L_SMALL_CMP_LOOP_LAST_CMP2,
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4382
        L_CMP_LOOP_LAST_CMP2, DONE, NOMATCH;
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4383
    // Read whole register from str1. It is safe, because length >=8 here
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4384
    __ ldr(ch1, Address(str1));
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4385
    // Read whole register from str2. It is safe, because length >=8 here
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4386
    __ ldr(ch2, Address(str2));
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4387
    __ andr(first, ch1, str1_isL ? 0xFF : 0xFFFF);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4388
    if (str1_isL != str2_isL) {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4389
      __ eor(v0, __ T16B, v0, v0);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4390
    }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4391
    __ mov(tmp1, str2_isL ? 0x0101010101010101 : 0x0001000100010001);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4392
    __ mul(first, first, tmp1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4393
    // check if we have less than 1 register to check
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4394
    __ subs(cnt2, cnt2, wordSize/str2_chr_size - 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4395
    if (str1_isL != str2_isL) {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4396
      __ fmovd(v1, ch1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4397
    }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4398
    __ br(__ LE, L_SMALL);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4399
    __ eor(ch2, first, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4400
    if (str1_isL != str2_isL) {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4401
      __ zip1(v1, __ T16B, v1, v0);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4402
    }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4403
    __ sub(tmp2, ch2, tmp1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4404
    __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4405
    __ bics(tmp2, tmp2, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4406
    if (str1_isL != str2_isL) {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4407
      __ fmovd(ch1, v1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4408
    }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4409
    __ br(__ NE, L_HAS_ZERO);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4410
    __ subs(cnt2, cnt2, wordSize/str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4411
    __ add(result, result, wordSize/str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4412
    __ add(str2, str2, wordSize);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4413
    __ br(__ LT, L_POST_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4414
    __ BIND(L_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4415
      __ ldr(ch2, Address(str2));
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4416
      __ eor(ch2, first, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4417
      __ sub(tmp2, ch2, tmp1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4418
      __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4419
      __ bics(tmp2, tmp2, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4420
      __ br(__ NE, L_HAS_ZERO);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4421
    __ BIND(L_LOOP_PROCEED);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4422
      __ subs(cnt2, cnt2, wordSize/str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4423
      __ add(str2, str2, wordSize);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4424
      __ add(result, result, wordSize/str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4425
      __ br(__ GE, L_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4426
    __ BIND(L_POST_LOOP);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  4427
      __ subs(zr, cnt2, -wordSize/str2_chr_size); // no extra characters to check
50757
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4428
      __ br(__ LE, NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4429
      __ ldr(ch2, Address(str2));
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4430
      __ sub(cnt2, zr, cnt2, __ LSL, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4431
      __ eor(ch2, first, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4432
      __ sub(tmp2, ch2, tmp1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4433
      __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4434
      __ mov(tmp4, -1); // all bits set
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4435
      __ b(L_SMALL_PROCEED);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4436
    __ align(OptoLoopAlignment);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4437
    __ BIND(L_SMALL);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4438
      __ sub(cnt2, zr, cnt2, __ LSL, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4439
      __ eor(ch2, first, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4440
      if (str1_isL != str2_isL) {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4441
        __ zip1(v1, __ T16B, v1, v0);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4442
      }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4443
      __ sub(tmp2, ch2, tmp1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4444
      __ mov(tmp4, -1); // all bits set
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4445
      __ orr(ch2, ch2, str2_isL ? 0x7f7f7f7f7f7f7f7f : 0x7fff7fff7fff7fff);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4446
      if (str1_isL != str2_isL) {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4447
        __ fmovd(ch1, v1); // move converted 4 symbols
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4448
      }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4449
    __ BIND(L_SMALL_PROCEED);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4450
      __ lsrv(tmp4, tmp4, cnt2); // mask. zeroes on useless bits.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4451
      __ bic(tmp2, tmp2, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4452
      __ ands(tmp2, tmp2, tmp4); // clear useless bits and check
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4453
      __ rbit(tmp2, tmp2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4454
      __ br(__ EQ, NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4455
    __ BIND(L_SMALL_HAS_ZERO_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4456
      __ clz(tmp4, tmp2); // potentially long. Up to 4 cycles on some cpu's
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  4457
      __ cmp(cnt1, u1(wordSize/str2_chr_size));
50757
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4458
      __ br(__ LE, L_SMALL_CMP_LOOP_LAST_CMP2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4459
      if (str2_isL) { // LL
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4460
        __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); // address of "index"
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4461
        __ ldr(ch2, Address(str2)); // read whole register of str2. Safe.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4462
        __ lslv(tmp2, tmp2, tmp4); // shift off leading zeroes from match info
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4463
        __ add(result, result, tmp4, __ LSR, LogBitsPerByte);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4464
        __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4465
      } else {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4466
        __ mov(ch2, 0xE); // all bits in byte set except last one
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4467
        __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4468
        __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4469
        __ lslv(tmp2, tmp2, tmp4);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4470
        __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4471
        __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4472
        __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4473
        __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4474
      }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4475
      __ cmp(ch1, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4476
      __ mov(tmp4, wordSize/str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4477
      __ br(__ NE, L_SMALL_CMP_LOOP_NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4478
    __ BIND(L_SMALL_CMP_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4479
      str1_isL ? __ ldrb(first, Address(str1, tmp4, Address::lsl(str1_chr_shift)))
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4480
               : __ ldrh(first, Address(str1, tmp4, Address::lsl(str1_chr_shift)));
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4481
      str2_isL ? __ ldrb(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift)))
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4482
               : __ ldrh(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift)));
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4483
      __ add(tmp4, tmp4, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4484
      __ cmp(tmp4, cnt1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4485
      __ br(__ GE, L_SMALL_CMP_LOOP_LAST_CMP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4486
      __ cmp(first, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4487
      __ br(__ EQ, L_SMALL_CMP_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4488
    __ BIND(L_SMALL_CMP_LOOP_NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4489
      __ cbz(tmp2, NOMATCH); // no more matches. exit
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4490
      __ clz(tmp4, tmp2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4491
      __ add(result, result, 1); // advance index
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4492
      __ add(str2, str2, str2_chr_size); // advance pointer
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4493
      __ b(L_SMALL_HAS_ZERO_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4494
    __ align(OptoLoopAlignment);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4495
    __ BIND(L_SMALL_CMP_LOOP_LAST_CMP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4496
      __ cmp(first, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4497
      __ br(__ NE, L_SMALL_CMP_LOOP_NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4498
      __ b(DONE);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4499
    __ align(OptoLoopAlignment);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4500
    __ BIND(L_SMALL_CMP_LOOP_LAST_CMP2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4501
      if (str2_isL) { // LL
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4502
        __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte); // address of "index"
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4503
        __ ldr(ch2, Address(str2)); // read whole register of str2. Safe.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4504
        __ lslv(tmp2, tmp2, tmp4); // shift off leading zeroes from match info
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4505
        __ add(result, result, tmp4, __ LSR, LogBitsPerByte);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4506
        __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4507
      } else {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4508
        __ mov(ch2, 0xE); // all bits in byte set except last one
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4509
        __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4510
        __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4511
        __ lslv(tmp2, tmp2, tmp4);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4512
        __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4513
        __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4514
        __ lsl(tmp2, tmp2, 1); // shift off leading "1" from match info
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4515
        __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4516
      }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4517
      __ cmp(ch1, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4518
      __ br(__ NE, L_SMALL_CMP_LOOP_NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4519
      __ b(DONE);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4520
    __ align(OptoLoopAlignment);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4521
    __ BIND(L_HAS_ZERO);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4522
      __ rbit(tmp2, tmp2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4523
      __ clz(tmp4, tmp2); // potentially long. Up to 4 cycles on some CPU's
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4524
      // Now, perform compression of counters(cnt2 and cnt1) into one register.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4525
      // It's fine because both counters are 32bit and are not changed in this
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4526
      // loop. Just restore it on exit. So, cnt1 can be re-used in this loop.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4527
      __ orr(cnt2, cnt2, cnt1, __ LSL, BitsPerByte * wordSize / 2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4528
      __ sub(result, result, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4529
    __ BIND(L_HAS_ZERO_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4530
      __ mov(cnt1, wordSize/str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4531
      __ cmp(cnt1, cnt2, __ LSR, BitsPerByte * wordSize / 2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4532
      __ br(__ GE, L_CMP_LOOP_LAST_CMP2); // case of 8 bytes only to compare
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4533
      if (str2_isL) {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4534
        __ lsr(ch2, tmp4, LogBitsPerByte + str2_chr_shift); // char index
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4535
        __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4536
        __ lslv(tmp2, tmp2, tmp4);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4537
        __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4538
        __ add(tmp4, tmp4, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4539
        __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4540
        __ lsl(tmp2, tmp2, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4541
        __ mov(tmp4, wordSize/str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4542
      } else {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4543
        __ mov(ch2, 0xE);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4544
        __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4545
        __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4546
        __ lslv(tmp2, tmp2, tmp4);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4547
        __ add(tmp4, tmp4, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4548
        __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4549
        __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4550
        __ lsl(tmp2, tmp2, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4551
        __ mov(tmp4, wordSize/str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4552
        __ sub(str2, str2, str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4553
      }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4554
      __ cmp(ch1, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4555
      __ mov(tmp4, wordSize/str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4556
      __ br(__ NE, L_CMP_LOOP_NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4557
    __ BIND(L_CMP_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4558
      str1_isL ? __ ldrb(cnt1, Address(str1, tmp4, Address::lsl(str1_chr_shift)))
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4559
               : __ ldrh(cnt1, Address(str1, tmp4, Address::lsl(str1_chr_shift)));
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4560
      str2_isL ? __ ldrb(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift)))
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4561
               : __ ldrh(ch2, Address(str2, tmp4, Address::lsl(str2_chr_shift)));
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4562
      __ add(tmp4, tmp4, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4563
      __ cmp(tmp4, cnt2, __ LSR, BitsPerByte * wordSize / 2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4564
      __ br(__ GE, L_CMP_LOOP_LAST_CMP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4565
      __ cmp(cnt1, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4566
      __ br(__ EQ, L_CMP_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4567
    __ BIND(L_CMP_LOOP_NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4568
      // here we're not matched
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4569
      __ cbz(tmp2, L_HAS_ZERO_LOOP_NOMATCH); // no more matches. Proceed to main loop
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4570
      __ clz(tmp4, tmp2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4571
      __ add(str2, str2, str2_chr_size); // advance pointer
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4572
      __ b(L_HAS_ZERO_LOOP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4573
    __ align(OptoLoopAlignment);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4574
    __ BIND(L_CMP_LOOP_LAST_CMP);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4575
      __ cmp(cnt1, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4576
      __ br(__ NE, L_CMP_LOOP_NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4577
      __ b(DONE);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4578
    __ align(OptoLoopAlignment);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4579
    __ BIND(L_CMP_LOOP_LAST_CMP2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4580
      if (str2_isL) {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4581
        __ lsr(ch2, tmp4, LogBitsPerByte + str2_chr_shift); // char index
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4582
        __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4583
        __ lslv(tmp2, tmp2, tmp4);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4584
        __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4585
        __ add(tmp4, tmp4, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4586
        __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4587
        __ lsl(tmp2, tmp2, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4588
      } else {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4589
        __ mov(ch2, 0xE);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4590
        __ andr(ch2, ch2, tmp4, __ LSR, LogBitsPerByte); // byte shift amount
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4591
        __ ldr(ch2, Address(str2, ch2)); // read whole register of str2. Safe.
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4592
        __ lslv(tmp2, tmp2, tmp4);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4593
        __ add(tmp4, tmp4, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4594
        __ add(result, result, tmp4, __ LSR, LogBitsPerByte + str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4595
        __ add(str2, str2, tmp4, __ LSR, LogBitsPerByte);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4596
        __ lsl(tmp2, tmp2, 1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4597
        __ sub(str2, str2, str2_chr_size);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4598
      }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4599
      __ cmp(ch1, ch2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4600
      __ br(__ NE, L_CMP_LOOP_NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4601
      __ b(DONE);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4602
    __ align(OptoLoopAlignment);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4603
    __ BIND(L_HAS_ZERO_LOOP_NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4604
      // 1) Restore "result" index. Index was wordSize/str2_chr_size * N until
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4605
      // L_HAS_ZERO block. Byte octet was analyzed in L_HAS_ZERO_LOOP,
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4606
      // so, result was increased at max by wordSize/str2_chr_size - 1, so,
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4607
      // respective high bit wasn't changed. L_LOOP_PROCEED will increase
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4608
      // result by analyzed characters value, so, we can just reset lower bits
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4609
      // in result here. Clear 2 lower bits for UU/UL and 3 bits for LL
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4610
      // 2) restore cnt1 and cnt2 values from "compressed" cnt2
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4611
      // 3) advance str2 value to represent next str2 octet. result & 7/3 is
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4612
      // index of last analyzed substring inside current octet. So, str2 in at
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4613
      // respective start address. We need to advance it to next octet
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4614
      __ andr(tmp2, result, wordSize/str2_chr_size - 1); // symbols analyzed
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4615
      __ lsr(cnt1, cnt2, BitsPerByte * wordSize / 2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4616
      __ bfm(result, zr, 0, 2 - str2_chr_shift);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4617
      __ sub(str2, str2, tmp2, __ LSL, str2_chr_shift); // restore str2
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4618
      __ movw(cnt2, cnt2);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4619
      __ b(L_LOOP_PROCEED);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4620
    __ align(OptoLoopAlignment);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4621
    __ BIND(NOMATCH);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4622
      __ mov(result, -1);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4623
    __ BIND(DONE);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4624
      __ pop(spilled_regs, sp);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4625
      __ ret(lr);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4626
    return entry;
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4627
  }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4628
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4629
  void generate_string_indexof_stubs() {
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4630
    StubRoutines::aarch64::_string_indexof_linear_ll = generate_string_indexof_linear(true, true);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4631
    StubRoutines::aarch64::_string_indexof_linear_uu = generate_string_indexof_linear(false, false);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4632
    StubRoutines::aarch64::_string_indexof_linear_ul = generate_string_indexof_linear(true, false);
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4633
  }
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  4634
50758
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4635
  void inflate_and_store_2_fp_registers(bool generatePrfm,
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4636
      FloatRegister src1, FloatRegister src2) {
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4637
    Register dst = r1;
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4638
    __ zip1(v1, __ T16B, src1, v0);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4639
    __ zip2(v2, __ T16B, src1, v0);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4640
    if (generatePrfm) {
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4641
      __ prfm(Address(dst, SoftwarePrefetchHintDistance), PSTL1STRM);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4642
    }
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4643
    __ zip1(v3, __ T16B, src2, v0);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4644
    __ zip2(v4, __ T16B, src2, v0);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4645
    __ st1(v1, v2, v3, v4, __ T16B, Address(__ post(dst, 64)));
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4646
  }
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4647
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4648
  // R0 = src
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4649
  // R1 = dst
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4650
  // R2 = len
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4651
  // R3 = len >> 3
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4652
  // V0 = 0
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4653
  // v1 = loaded 8 bytes
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4654
  address generate_large_byte_array_inflate() {
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4655
    __ align(CodeEntryAlignment);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4656
    StubCodeMark mark(this, "StubRoutines", "large_byte_array_inflate");
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4657
    address entry = __ pc();
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4658
    Label LOOP, LOOP_START, LOOP_PRFM, LOOP_PRFM_START, DONE;
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4659
    Register src = r0, dst = r1, len = r2, octetCounter = r3;
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4660
    const int large_loop_threshold = MAX(64, SoftwarePrefetchHintDistance)/8 + 4;
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4661
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4662
    // do one more 8-byte read to have address 16-byte aligned in most cases
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4663
    // also use single store instruction
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4664
    __ ldrd(v2, __ post(src, 8));
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4665
    __ sub(octetCounter, octetCounter, 2);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4666
    __ zip1(v1, __ T16B, v1, v0);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4667
    __ zip1(v2, __ T16B, v2, v0);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4668
    __ st1(v1, v2, __ T16B, __ post(dst, 32));
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4669
    __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  4670
    __ subs(rscratch1, octetCounter, large_loop_threshold);
50758
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4671
    __ br(__ LE, LOOP_START);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4672
    __ b(LOOP_PRFM_START);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4673
    __ bind(LOOP_PRFM);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4674
      __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4675
    __ bind(LOOP_PRFM_START);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4676
      __ prfm(Address(src, SoftwarePrefetchHintDistance));
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4677
      __ sub(octetCounter, octetCounter, 8);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  4678
      __ subs(rscratch1, octetCounter, large_loop_threshold);
50758
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4679
      inflate_and_store_2_fp_registers(true, v3, v4);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4680
      inflate_and_store_2_fp_registers(true, v5, v6);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4681
      __ br(__ GT, LOOP_PRFM);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  4682
      __ cmp(octetCounter, (u1)8);
50758
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4683
      __ br(__ LT, DONE);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4684
    __ bind(LOOP);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4685
      __ ld1(v3, v4, v5, v6, __ T16B, Address(__ post(src, 64)));
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4686
      __ bind(LOOP_START);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4687
      __ sub(octetCounter, octetCounter, 8);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  4688
      __ cmp(octetCounter, (u1)8);
50758
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4689
      inflate_and_store_2_fp_registers(false, v3, v4);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4690
      inflate_and_store_2_fp_registers(false, v5, v6);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4691
      __ br(__ GE, LOOP);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4692
    __ bind(DONE);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4693
      __ ret(lr);
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4694
    return entry;
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4695
  }
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  4696
31961
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4697
  /**
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4698
   *  Arguments:
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4699
   *
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4700
   *  Input:
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4701
   *  c_rarg0   - current state address
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4702
   *  c_rarg1   - H key address
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4703
   *  c_rarg2   - data address
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4704
   *  c_rarg3   - number of blocks
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4705
   *
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4706
   *  Output:
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4707
   *  Updated state at c_rarg0
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4708
   */
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4709
  address generate_ghash_processBlocks() {
32574
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4710
    // Bafflingly, GCM uses little-endian for the byte order, but
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4711
    // big-endian for the bit order.  For example, the polynomial 1 is
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4712
    // represented as the 16-byte string 80 00 00 00 | 12 bytes of 00.
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4713
    //
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4714
    // So, we must either reverse the bytes in each word and do
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4715
    // everything big-endian or reverse the bits in each byte and do
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4716
    // it little-endian.  On AArch64 it's more idiomatic to reverse
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4717
    // the bits in each byte (we have an instruction, RBIT, to do
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4718
    // that) and keep the data in little-endian bit order throught the
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4719
    // calculation, bit-reversing the inputs and outputs.
31961
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4720
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4721
    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
32574
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4722
    __ align(wordSize * 2);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4723
    address p = __ pc();
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4724
    __ emit_int64(0x87);  // The low-order bits of the field
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4725
                          // polynomial (i.e. p = z^7+z^2+z+1)
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4726
                          // repeated in the low and high parts of a
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4727
                          // 128-bit vector
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4728
    __ emit_int64(0x87);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4729
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4730
    __ align(CodeEntryAlignment);
31961
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4731
    address start = __ pc();
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4732
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4733
    Register state   = c_rarg0;
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4734
    Register subkeyH = c_rarg1;
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4735
    Register data    = c_rarg2;
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4736
    Register blocks  = c_rarg3;
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4737
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4738
    FloatRegister vzr = v30;
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4739
    __ eor(vzr, __ T16B, vzr, vzr); // zero register
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4740
32574
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4741
    __ ldrq(v0, Address(state));
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4742
    __ ldrq(v1, Address(subkeyH));
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4743
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4744
    __ rev64(v0, __ T16B, v0);          // Bit-reverse words in state and subkeyH
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4745
    __ rbit(v0, __ T16B, v0);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4746
    __ rev64(v1, __ T16B, v1);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4747
    __ rbit(v1, __ T16B, v1);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4748
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4749
    __ ldrq(v26, p);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4750
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4751
    __ ext(v16, __ T16B, v1, v1, 0x08); // long-swap subkeyH into v1
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4752
    __ eor(v16, __ T16B, v16, v1);      // xor subkeyH into subkeyL (Karatsuba: (A1+A0))
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4753
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4754
    {
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4755
      Label L_ghash_loop;
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4756
      __ bind(L_ghash_loop);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4757
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4758
      __ ldrq(v2, Address(__ post(data, 0x10))); // Load the data, bit
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4759
                                                 // reversing each byte
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4760
      __ rbit(v2, __ T16B, v2);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4761
      __ eor(v2, __ T16B, v0, v2);   // bit-swapped data ^ bit-swapped state
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4762
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4763
      // Multiply state in v2 by subkey in v1
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4764
      ghash_multiply(/*result_lo*/v5, /*result_hi*/v7,
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4765
                     /*a*/v1, /*b*/v2, /*a1_xor_a0*/v16,
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4766
                     /*temps*/v6, v20, v18, v21);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4767
      // Reduce v7:v5 by the field polynomial
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4768
      ghash_reduce(v0, v5, v7, v26, vzr, v20);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4769
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4770
      __ sub(blocks, blocks, 1);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4771
      __ cbnz(blocks, L_ghash_loop);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4772
    }
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4773
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4774
    // The bit-reversed result is at this point in v0
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4775
    __ rev64(v1, __ T16B, v0);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4776
    __ rbit(v1, __ T16B, v1);
6c3b890aa5d9 8134869: AARCH64: GHASH intrinsic is not optimal
aph
parents: 31961
diff changeset
  4777
31961
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4778
    __ st1(v1, __ T16B, state);
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4779
    __ ret(lr);
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4780
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4781
    return start;
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4782
  }
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  4783
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4784
  // Continuation point for throwing of implicit exceptions that are
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4785
  // not handled in the current activation. Fabricates an exception
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4786
  // oop and initiates normal exception dispatching in this
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4787
  // frame. Since we need to preserve callee-saved values (currently
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4788
  // only for C2, but done for C1 as well) we need a callee-saved oop
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4789
  // map and therefore have to make these stubs into RuntimeStubs
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4790
  // rather than BufferBlobs.  If the compiler needs all registers to
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4791
  // be preserved between the fault point and the exception handler
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4792
  // then it must assume responsibility for that in
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4793
  // AbstractCompiler::continuation_for_implicit_null_exception or
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4794
  // continuation_for_implicit_division_by_zero_exception. All other
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4795
  // implicit exceptions (e.g., NullPointerException or
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4796
  // AbstractMethodError on entry) are either at call sites or
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4797
  // otherwise assume that stack unwinding will be initiated, so
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4798
  // caller saved registers were assumed volatile in the compiler.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4799
30225
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  4800
#undef __
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  4801
#define __ masm->
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  4802
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4803
  address generate_throw_exception(const char* name,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4804
                                   address runtime_entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4805
                                   Register arg1 = noreg,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4806
                                   Register arg2 = noreg) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4807
    // Information about frame layout at time of blocking runtime call.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4808
    // Note that we only have to preserve callee-saved registers since
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4809
    // the compilers are responsible for supplying a continuation point
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4810
    // if they expect all registers to be preserved.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4811
    // n.b. aarch64 asserts that frame::arg_reg_save_area_bytes == 0
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4812
    enum layout {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4813
      rfp_off = 0,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4814
      rfp_off2,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4815
      return_off,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4816
      return_off2,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4817
      framesize // inclusive of return address
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4818
    };
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4819
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4820
    int insts_size = 512;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4821
    int locs_size  = 64;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4822
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4823
    CodeBuffer code(name, insts_size, locs_size);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4824
    OopMapSet* oop_maps  = new OopMapSet();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4825
    MacroAssembler* masm = new MacroAssembler(&code);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4826
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4827
    address start = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4828
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4829
    // This is an inlined and slightly modified version of call_VM
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4830
    // which has the ability to fetch the return PC out of
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4831
    // thread-local storage and also sets up last_Java_sp slightly
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4832
    // differently than the real call_VM
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4833
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4834
    __ enter(); // Save FP and LR before call
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4835
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4836
    assert(is_even(framesize/2), "sp not 16-byte aligned");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4837
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4838
    // lr and fp are already in place
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4839
    __ sub(sp, rfp, ((unsigned)framesize-4) << LogBytesPerInt); // prolog
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4840
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4841
    int frame_complete = __ pc() - start;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4842
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4843
    // Set up last_Java_sp and last_Java_fp
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4844
    address the_pc = __ pc();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4845
    __ set_last_Java_frame(sp, rfp, (address)NULL, rscratch1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4846
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4847
    // Call runtime
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4848
    if (arg1 != noreg) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4849
      assert(arg2 != c_rarg1, "clobbered");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4850
      __ mov(c_rarg1, arg1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4851
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4852
    if (arg2 != noreg) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4853
      __ mov(c_rarg2, arg2);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4854
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4855
    __ mov(c_rarg0, rthread);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4856
    BLOCK_COMMENT("call runtime_entry");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4857
    __ mov(rscratch1, runtime_entry);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4858
    __ blrt(rscratch1, 3 /* number_of_arguments */, 0, 1);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4859
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4860
    // Generate oop map
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4861
    OopMap* map = new OopMap(framesize, 0);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4862
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4863
    oop_maps->add_gc_map(the_pc - start, map);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4864
40643
49539fc14e5a 8164113: AArch64: follow-up the fix for 8161598
aph
parents: 40080
diff changeset
  4865
    __ reset_last_Java_frame(true);
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4866
    __ maybe_isb();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4867
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4868
    __ leave();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4869
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4870
    // check for pending exceptions
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4871
#ifdef ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4872
    Label L;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4873
    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4874
    __ cbnz(rscratch1, L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4875
    __ should_not_reach_here();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4876
    __ bind(L);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4877
#endif // ASSERT
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4878
    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4879
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4880
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4881
    // codeBlob framesize is in words (not VMRegImpl::slot_size)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4882
    RuntimeStub* stub =
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4883
      RuntimeStub::new_runtime_stub(name,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4884
                                    &code,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4885
                                    frame_complete,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4886
                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4887
                                    oop_maps, false);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4888
    return stub->entry_point();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4889
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  4890
31955
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4891
  class MontgomeryMultiplyGenerator : public MacroAssembler {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4892
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4893
    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4894
      Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4895
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4896
    RegSet _toSave;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4897
    bool _squaring;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4898
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4899
  public:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4900
    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4901
      : MacroAssembler(as->code()), _squaring(squaring) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4902
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4903
      // Register allocation
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4904
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4905
      Register reg = c_rarg0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4906
      Pa_base = reg;       // Argument registers
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4907
      if (squaring)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4908
        Pb_base = Pa_base;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4909
      else
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4910
        Pb_base = ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4911
      Pn_base = ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4912
      Rlen= ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4913
      inv = ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4914
      Pm_base = ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4915
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4916
                          // Working registers:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4917
      Ra =  ++reg;        // The current digit of a, b, n, and m.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4918
      Rb =  ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4919
      Rm =  ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4920
      Rn =  ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4921
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4922
      Pa =  ++reg;        // Pointers to the current/next digit of a, b, n, and m.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4923
      Pb =  ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4924
      Pm =  ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4925
      Pn =  ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4926
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4927
      t0 =  ++reg;        // Three registers which form a
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4928
      t1 =  ++reg;        // triple-precision accumuator.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4929
      t2 =  ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4930
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4931
      Ri =  ++reg;        // Inner and outer loop indexes.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4932
      Rj =  ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4933
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4934
      Rhi_ab = ++reg;     // Product registers: low and high parts
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4935
      Rlo_ab = ++reg;     // of a*b and m*n.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4936
      Rhi_mn = ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4937
      Rlo_mn = ++reg;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4938
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4939
      // r19 and up are callee-saved.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4940
      _toSave = RegSet::range(r19, reg) + Pm_base;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4941
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4942
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4943
  private:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4944
    void save_regs() {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4945
      push(_toSave, sp);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4946
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4947
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4948
    void restore_regs() {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4949
      pop(_toSave, sp);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4950
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4951
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4952
    template <typename T>
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4953
    void unroll_2(Register count, T block) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4954
      Label loop, end, odd;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4955
      tbnz(count, 0, odd);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4956
      cbz(count, end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4957
      align(16);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4958
      bind(loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4959
      (this->*block)();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4960
      bind(odd);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4961
      (this->*block)();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4962
      subs(count, count, 2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4963
      br(Assembler::GT, loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4964
      bind(end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4965
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4966
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4967
    template <typename T>
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4968
    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4969
      Label loop, end, odd;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4970
      tbnz(count, 0, odd);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4971
      cbz(count, end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4972
      align(16);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4973
      bind(loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4974
      (this->*block)(d, s, tmp);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4975
      bind(odd);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4976
      (this->*block)(d, s, tmp);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4977
      subs(count, count, 2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4978
      br(Assembler::GT, loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4979
      bind(end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4980
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4981
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4982
    void pre1(RegisterOrConstant i) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4983
      block_comment("pre1");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4984
      // Pa = Pa_base;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4985
      // Pb = Pb_base + i;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4986
      // Pm = Pm_base;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4987
      // Pn = Pn_base + i;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4988
      // Ra = *Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4989
      // Rb = *Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4990
      // Rm = *Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4991
      // Rn = *Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4992
      ldr(Ra, Address(Pa_base));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4993
      ldr(Rb, Address(Pb_base, i, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4994
      ldr(Rm, Address(Pm_base));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4995
      ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4996
      lea(Pa, Address(Pa_base));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4997
      lea(Pb, Address(Pb_base, i, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4998
      lea(Pm, Address(Pm_base));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  4999
      lea(Pn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5000
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5001
      // Zero the m*n result.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5002
      mov(Rhi_mn, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5003
      mov(Rlo_mn, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5004
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5005
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5006
    // The core multiply-accumulate step of a Montgomery
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5007
    // multiplication.  The idea is to schedule operations as a
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5008
    // pipeline so that instructions with long latencies (loads and
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5009
    // multiplies) have time to complete before their results are
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5010
    // used.  This most benefits in-order implementations of the
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5011
    // architecture but out-of-order ones also benefit.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5012
    void step() {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5013
      block_comment("step");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5014
      // MACC(Ra, Rb, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5015
      // Ra = *++Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5016
      // Rb = *--Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5017
      umulh(Rhi_ab, Ra, Rb);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5018
      mul(Rlo_ab, Ra, Rb);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5019
      ldr(Ra, pre(Pa, wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5020
      ldr(Rb, pre(Pb, -wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5021
      acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n from the
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5022
                                       // previous iteration.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5023
      // MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5024
      // Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5025
      // Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5026
      umulh(Rhi_mn, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5027
      mul(Rlo_mn, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5028
      ldr(Rm, pre(Pm, wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5029
      ldr(Rn, pre(Pn, -wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5030
      acc(Rhi_ab, Rlo_ab, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5031
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5032
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5033
    void post1() {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5034
      block_comment("post1");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5035
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5036
      // MACC(Ra, Rb, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5037
      // Ra = *++Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5038
      // Rb = *--Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5039
      umulh(Rhi_ab, Ra, Rb);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5040
      mul(Rlo_ab, Ra, Rb);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5041
      acc(Rhi_mn, Rlo_mn, t0, t1, t2);  // The pending m*n
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5042
      acc(Rhi_ab, Rlo_ab, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5043
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5044
      // *Pm = Rm = t0 * inv;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5045
      mul(Rm, t0, inv);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5046
      str(Rm, Address(Pm));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5047
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5048
      // MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5049
      // t0 = t1; t1 = t2; t2 = 0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5050
      umulh(Rhi_mn, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5051
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5052
#ifndef PRODUCT
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5053
      // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5054
      {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5055
        mul(Rlo_mn, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5056
        add(Rlo_mn, t0, Rlo_mn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5057
        Label ok;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5058
        cbz(Rlo_mn, ok); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5059
          stop("broken Montgomery multiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5060
        } bind(ok);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5061
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5062
#endif
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5063
      // We have very carefully set things up so that
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5064
      // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5065
      // the lower half of Rm * Rn because we know the result already:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5066
      // it must be -t0.  t0 + (-t0) must generate a carry iff
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5067
      // t0 != 0.  So, rather than do a mul and an adds we just set
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5068
      // the carry flag iff t0 is nonzero.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5069
      //
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5070
      // mul(Rlo_mn, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5071
      // adds(zr, t0, Rlo_mn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5072
      subs(zr, t0, 1); // Set carry iff t0 is nonzero
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5073
      adcs(t0, t1, Rhi_mn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5074
      adc(t1, t2, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5075
      mov(t2, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5076
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5077
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5078
    void pre2(RegisterOrConstant i, RegisterOrConstant len) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5079
      block_comment("pre2");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5080
      // Pa = Pa_base + i-len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5081
      // Pb = Pb_base + len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5082
      // Pm = Pm_base + i-len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5083
      // Pn = Pn_base + len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5084
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5085
      if (i.is_register()) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5086
        sub(Rj, i.as_register(), len);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5087
      } else {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5088
        mov(Rj, i.as_constant());
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5089
        sub(Rj, Rj, len);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5090
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5091
      // Rj == i-len
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5092
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5093
      lea(Pa, Address(Pa_base, Rj, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5094
      lea(Pb, Address(Pb_base, len, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5095
      lea(Pm, Address(Pm_base, Rj, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5096
      lea(Pn, Address(Pn_base, len, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5097
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5098
      // Ra = *++Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5099
      // Rb = *--Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5100
      // Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5101
      // Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5102
      ldr(Ra, pre(Pa, wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5103
      ldr(Rb, pre(Pb, -wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5104
      ldr(Rm, pre(Pm, wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5105
      ldr(Rn, pre(Pn, -wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5106
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5107
      mov(Rhi_mn, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5108
      mov(Rlo_mn, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5109
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5110
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5111
    void post2(RegisterOrConstant i, RegisterOrConstant len) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5112
      block_comment("post2");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5113
      if (i.is_constant()) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5114
        mov(Rj, i.as_constant()-len.as_constant());
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5115
      } else {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5116
        sub(Rj, i.as_register(), len);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5117
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5118
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5119
      adds(t0, t0, Rlo_mn); // The pending m*n, low part
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5120
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5121
      // As soon as we know the least significant digit of our result,
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5122
      // store it.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5123
      // Pm_base[i-len] = t0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5124
      str(t0, Address(Pm_base, Rj, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5125
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5126
      // t0 = t1; t1 = t2; t2 = 0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5127
      adcs(t0, t1, Rhi_mn); // The pending m*n, high part
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5128
      adc(t1, t2, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5129
      mov(t2, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5130
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5131
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5132
    // A carry in t0 after Montgomery multiplication means that we
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5133
    // should subtract multiples of n from our result in m.  We'll
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5134
    // keep doing that until there is no carry.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5135
    void normalize(RegisterOrConstant len) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5136
      block_comment("normalize");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5137
      // while (t0)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5138
      //   t0 = sub(Pm_base, Pn_base, t0, len);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5139
      Label loop, post, again;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5140
      Register cnt = t1, i = t2; // Re-use registers; we're done with them now
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5141
      cbz(t0, post); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5142
        bind(again); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5143
          mov(i, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5144
          mov(cnt, len);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5145
          ldr(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5146
          ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5147
          subs(zr, zr, zr); // set carry flag, i.e. no borrow
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5148
          align(16);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5149
          bind(loop); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5150
            sbcs(Rm, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5151
            str(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5152
            add(i, i, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5153
            ldr(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5154
            ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5155
            sub(cnt, cnt, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5156
          } cbnz(cnt, loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5157
          sbc(t0, t0, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5158
        } cbnz(t0, again);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5159
      } bind(post);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5160
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5161
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5162
    // Move memory at s to d, reversing words.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5163
    //    Increments d to end of copied memory
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5164
    //    Destroys tmp1, tmp2
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5165
    //    Preserves len
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5166
    //    Leaves s pointing to the address which was in d at start
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5167
    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5168
      assert(tmp1 < r19 && tmp2 < r19, "register corruption");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5169
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5170
      lea(s, Address(s, len, Address::uxtw(LogBytesPerWord)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5171
      mov(tmp1, len);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5172
      unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5173
      sub(s, d, len, ext::uxtw, LogBytesPerWord);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5174
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5175
    // where
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5176
    void reverse1(Register d, Register s, Register tmp) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5177
      ldr(tmp, pre(s, -wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5178
      ror(tmp, tmp, 32);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5179
      str(tmp, post(d, wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5180
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5181
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5182
    void step_squaring() {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5183
      // An extra ACC
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5184
      step();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5185
      acc(Rhi_ab, Rlo_ab, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5186
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5187
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5188
    void last_squaring(RegisterOrConstant i) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5189
      Label dont;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5190
      // if ((i & 1) == 0) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5191
      tbnz(i.as_register(), 0, dont); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5192
        // MACC(Ra, Rb, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5193
        // Ra = *++Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5194
        // Rb = *--Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5195
        umulh(Rhi_ab, Ra, Rb);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5196
        mul(Rlo_ab, Ra, Rb);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5197
        acc(Rhi_ab, Rlo_ab, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5198
      } bind(dont);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5199
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5200
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5201
    void extra_step_squaring() {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5202
      acc(Rhi_mn, Rlo_mn, t0, t1, t2);  // The pending m*n
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5203
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5204
      // MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5205
      // Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5206
      // Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5207
      umulh(Rhi_mn, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5208
      mul(Rlo_mn, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5209
      ldr(Rm, pre(Pm, wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5210
      ldr(Rn, pre(Pn, -wordSize));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5211
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5212
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5213
    void post1_squaring() {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5214
      acc(Rhi_mn, Rlo_mn, t0, t1, t2);  // The pending m*n
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5215
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5216
      // *Pm = Rm = t0 * inv;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5217
      mul(Rm, t0, inv);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5218
      str(Rm, Address(Pm));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5219
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5220
      // MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5221
      // t0 = t1; t1 = t2; t2 = 0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5222
      umulh(Rhi_mn, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5223
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5224
#ifndef PRODUCT
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5225
      // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5226
      {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5227
        mul(Rlo_mn, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5228
        add(Rlo_mn, t0, Rlo_mn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5229
        Label ok;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5230
        cbz(Rlo_mn, ok); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5231
          stop("broken Montgomery multiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5232
        } bind(ok);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5233
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5234
#endif
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5235
      // We have very carefully set things up so that
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5236
      // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5237
      // the lower half of Rm * Rn because we know the result already:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5238
      // it must be -t0.  t0 + (-t0) must generate a carry iff
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5239
      // t0 != 0.  So, rather than do a mul and an adds we just set
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5240
      // the carry flag iff t0 is nonzero.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5241
      //
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5242
      // mul(Rlo_mn, Rm, Rn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5243
      // adds(zr, t0, Rlo_mn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5244
      subs(zr, t0, 1); // Set carry iff t0 is nonzero
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5245
      adcs(t0, t1, Rhi_mn);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5246
      adc(t1, t2, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5247
      mov(t2, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5248
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5249
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5250
    void acc(Register Rhi, Register Rlo,
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5251
             Register t0, Register t1, Register t2) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5252
      adds(t0, t0, Rlo);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5253
      adcs(t1, t1, Rhi);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5254
      adc(t2, t2, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5255
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5256
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5257
  public:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5258
    /**
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5259
     * Fast Montgomery multiplication.  The derivation of the
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5260
     * algorithm is in A Cryptographic Library for the Motorola
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5261
     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5262
     *
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5263
     * Arguments:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5264
     *
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5265
     * Inputs for multiplication:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5266
     *   c_rarg0   - int array elements a
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5267
     *   c_rarg1   - int array elements b
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5268
     *   c_rarg2   - int array elements n (the modulus)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5269
     *   c_rarg3   - int length
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5270
     *   c_rarg4   - int inv
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5271
     *   c_rarg5   - int array elements m (the result)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5272
     *
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5273
     * Inputs for squaring:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5274
     *   c_rarg0   - int array elements a
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5275
     *   c_rarg1   - int array elements n (the modulus)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5276
     *   c_rarg2   - int length
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5277
     *   c_rarg3   - int inv
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5278
     *   c_rarg4   - int array elements m (the result)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5279
     *
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5280
     */
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5281
    address generate_multiply() {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5282
      Label argh, nothing;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5283
      bind(argh);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5284
      stop("MontgomeryMultiply total_allocation must be <= 8192");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5285
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5286
      align(CodeEntryAlignment);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5287
      address entry = pc();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5288
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5289
      cbzw(Rlen, nothing);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5290
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5291
      enter();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5292
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5293
      // Make room.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5294
      cmpw(Rlen, 512);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5295
      br(Assembler::HI, argh);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5296
      sub(Ra, sp, Rlen, ext::uxtw, exact_log2(4 * sizeof (jint)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5297
      andr(sp, Ra, -2 * wordSize);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5298
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5299
      lsrw(Rlen, Rlen, 1);  // length in longwords = len/2
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5300
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5301
      {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5302
        // Copy input args, reversing as we go.  We use Ra as a
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5303
        // temporary variable.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5304
        reverse(Ra, Pa_base, Rlen, t0, t1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5305
        if (!_squaring)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5306
          reverse(Ra, Pb_base, Rlen, t0, t1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5307
        reverse(Ra, Pn_base, Rlen, t0, t1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5308
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5309
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5310
      // Push all call-saved registers and also Pm_base which we'll need
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5311
      // at the end.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5312
      save_regs();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5313
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5314
#ifndef PRODUCT
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5315
      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5316
      {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5317
        ldr(Rn, Address(Pn_base, 0));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5318
        mul(Rlo_mn, Rn, inv);
51374
7be0084191ed 8206895: aarch64: rework error-prone cmp instuction
bulasevich
parents: 50758
diff changeset
  5319
        subs(zr, Rlo_mn, -1);
31955
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5320
        Label ok;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5321
        br(EQ, ok); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5322
          stop("broken inverse in Montgomery multiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5323
        } bind(ok);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5324
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5325
#endif
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5326
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5327
      mov(Pm_base, Ra);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5328
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5329
      mov(t0, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5330
      mov(t1, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5331
      mov(t2, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5332
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5333
      block_comment("for (int i = 0; i < len; i++) {");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5334
      mov(Ri, zr); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5335
        Label loop, end;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5336
        cmpw(Ri, Rlen);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5337
        br(Assembler::GE, end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5338
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5339
        bind(loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5340
        pre1(Ri);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5341
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5342
        block_comment("  for (j = i; j; j--) {"); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5343
          movw(Rj, Ri);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5344
          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5345
        } block_comment("  } // j");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5346
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5347
        post1();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5348
        addw(Ri, Ri, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5349
        cmpw(Ri, Rlen);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5350
        br(Assembler::LT, loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5351
        bind(end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5352
        block_comment("} // i");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5353
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5354
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5355
      block_comment("for (int i = len; i < 2*len; i++) {");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5356
      mov(Ri, Rlen); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5357
        Label loop, end;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5358
        cmpw(Ri, Rlen, Assembler::LSL, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5359
        br(Assembler::GE, end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5360
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5361
        bind(loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5362
        pre2(Ri, Rlen);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5363
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5364
        block_comment("  for (j = len*2-i-1; j; j--) {"); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5365
          lslw(Rj, Rlen, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5366
          subw(Rj, Rj, Ri);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5367
          subw(Rj, Rj, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5368
          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5369
        } block_comment("  } // j");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5370
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5371
        post2(Ri, Rlen);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5372
        addw(Ri, Ri, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5373
        cmpw(Ri, Rlen, Assembler::LSL, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5374
        br(Assembler::LT, loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5375
        bind(end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5376
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5377
      block_comment("} // i");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5378
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5379
      normalize(Rlen);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5380
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5381
      mov(Ra, Pm_base);  // Save Pm_base in Ra
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5382
      restore_regs();  // Restore caller's Pm_base
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5383
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5384
      // Copy our result into caller's Pm_base
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5385
      reverse(Pm_base, Ra, Rlen, t0, t1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5386
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5387
      leave();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5388
      bind(nothing);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5389
      ret(lr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5390
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5391
      return entry;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5392
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5393
    // In C, approximately:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5394
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5395
    // void
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5396
    // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[],
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5397
    //                     unsigned long Pn_base[], unsigned long Pm_base[],
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5398
    //                     unsigned long inv, int len) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5399
    //   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5400
    //   unsigned long *Pa, *Pb, *Pn, *Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5401
    //   unsigned long Ra, Rb, Rn, Rm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5402
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5403
    //   int i;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5404
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5405
    //   assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5406
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5407
    //   for (i = 0; i < len; i++) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5408
    //     int j;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5409
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5410
    //     Pa = Pa_base;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5411
    //     Pb = Pb_base + i;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5412
    //     Pm = Pm_base;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5413
    //     Pn = Pn_base + i;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5414
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5415
    //     Ra = *Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5416
    //     Rb = *Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5417
    //     Rm = *Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5418
    //     Rn = *Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5419
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5420
    //     int iters = i;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5421
    //     for (j = 0; iters--; j++) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5422
    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5423
    //       MACC(Ra, Rb, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5424
    //       Ra = *++Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5425
    //       Rb = *--Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5426
    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5427
    //       MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5428
    //       Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5429
    //       Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5430
    //     }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5431
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5432
    //     assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5433
    //     MACC(Ra, Rb, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5434
    //     *Pm = Rm = t0 * inv;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5435
    //     assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5436
    //     MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5437
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5438
    //     assert(t0 == 0, "broken Montgomery multiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5439
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5440
    //     t0 = t1; t1 = t2; t2 = 0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5441
    //   }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5442
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5443
    //   for (i = len; i < 2*len; i++) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5444
    //     int j;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5445
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5446
    //     Pa = Pa_base + i-len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5447
    //     Pb = Pb_base + len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5448
    //     Pm = Pm_base + i-len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5449
    //     Pn = Pn_base + len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5450
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5451
    //     Ra = *++Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5452
    //     Rb = *--Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5453
    //     Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5454
    //     Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5455
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5456
    //     int iters = len*2-i-1;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5457
    //     for (j = i-len+1; iters--; j++) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5458
    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5459
    //       MACC(Ra, Rb, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5460
    //       Ra = *++Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5461
    //       Rb = *--Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5462
    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5463
    //       MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5464
    //       Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5465
    //       Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5466
    //     }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5467
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5468
    //     Pm_base[i-len] = t0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5469
    //     t0 = t1; t1 = t2; t2 = 0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5470
    //   }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5471
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5472
    //   while (t0)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5473
    //     t0 = sub(Pm_base, Pn_base, t0, len);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5474
    // }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5475
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5476
    /**
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5477
     * Fast Montgomery squaring.  This uses asymptotically 25% fewer
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5478
     * multiplies than Montgomery multiplication so it should be up to
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5479
     * 25% faster.  However, its loop control is more complex and it
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5480
     * may actually run slower on some machines.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5481
     *
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5482
     * Arguments:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5483
     *
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5484
     * Inputs:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5485
     *   c_rarg0   - int array elements a
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5486
     *   c_rarg1   - int array elements n (the modulus)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5487
     *   c_rarg2   - int length
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5488
     *   c_rarg3   - int inv
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5489
     *   c_rarg4   - int array elements m (the result)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5490
     *
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5491
     */
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5492
    address generate_square() {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5493
      Label argh;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5494
      bind(argh);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5495
      stop("MontgomeryMultiply total_allocation must be <= 8192");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5496
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5497
      align(CodeEntryAlignment);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5498
      address entry = pc();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5499
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5500
      enter();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5501
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5502
      // Make room.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5503
      cmpw(Rlen, 512);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5504
      br(Assembler::HI, argh);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5505
      sub(Ra, sp, Rlen, ext::uxtw, exact_log2(4 * sizeof (jint)));
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5506
      andr(sp, Ra, -2 * wordSize);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5507
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5508
      lsrw(Rlen, Rlen, 1);  // length in longwords = len/2
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5509
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5510
      {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5511
        // Copy input args, reversing as we go.  We use Ra as a
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5512
        // temporary variable.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5513
        reverse(Ra, Pa_base, Rlen, t0, t1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5514
        reverse(Ra, Pn_base, Rlen, t0, t1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5515
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5516
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5517
      // Push all call-saved registers and also Pm_base which we'll need
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5518
      // at the end.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5519
      save_regs();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5520
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5521
      mov(Pm_base, Ra);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5522
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5523
      mov(t0, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5524
      mov(t1, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5525
      mov(t2, zr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5526
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5527
      block_comment("for (int i = 0; i < len; i++) {");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5528
      mov(Ri, zr); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5529
        Label loop, end;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5530
        bind(loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5531
        cmp(Ri, Rlen);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5532
        br(Assembler::GE, end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5533
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5534
        pre1(Ri);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5535
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5536
        block_comment("for (j = (i+1)/2; j; j--) {"); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5537
          add(Rj, Ri, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5538
          lsr(Rj, Rj, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5539
          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5540
        } block_comment("  } // j");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5541
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5542
        last_squaring(Ri);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5543
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5544
        block_comment("  for (j = i/2; j; j--) {"); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5545
          lsr(Rj, Ri, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5546
          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5547
        } block_comment("  } // j");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5548
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5549
        post1_squaring();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5550
        add(Ri, Ri, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5551
        cmp(Ri, Rlen);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5552
        br(Assembler::LT, loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5553
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5554
        bind(end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5555
        block_comment("} // i");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5556
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5557
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5558
      block_comment("for (int i = len; i < 2*len; i++) {");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5559
      mov(Ri, Rlen); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5560
        Label loop, end;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5561
        bind(loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5562
        cmp(Ri, Rlen, Assembler::LSL, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5563
        br(Assembler::GE, end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5564
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5565
        pre2(Ri, Rlen);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5566
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5567
        block_comment("  for (j = (2*len-i-1)/2; j; j--) {"); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5568
          lsl(Rj, Rlen, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5569
          sub(Rj, Rj, Ri);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5570
          sub(Rj, Rj, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5571
          lsr(Rj, Rj, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5572
          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5573
        } block_comment("  } // j");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5574
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5575
        last_squaring(Ri);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5576
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5577
        block_comment("  for (j = (2*len-i)/2; j; j--) {"); {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5578
          lsl(Rj, Rlen, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5579
          sub(Rj, Rj, Ri);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5580
          lsr(Rj, Rj, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5581
          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5582
        } block_comment("  } // j");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5583
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5584
        post2(Ri, Rlen);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5585
        add(Ri, Ri, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5586
        cmp(Ri, Rlen, Assembler::LSL, 1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5587
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5588
        br(Assembler::LT, loop);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5589
        bind(end);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5590
        block_comment("} // i");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5591
      }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5592
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5593
      normalize(Rlen);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5594
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5595
      mov(Ra, Pm_base);  // Save Pm_base in Ra
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5596
      restore_regs();  // Restore caller's Pm_base
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5597
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5598
      // Copy our result into caller's Pm_base
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5599
      reverse(Pm_base, Ra, Rlen, t0, t1);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5600
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5601
      leave();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5602
      ret(lr);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5603
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5604
      return entry;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5605
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5606
    // In C, approximately:
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5607
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5608
    // void
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5609
    // montgomery_square(unsigned long Pa_base[], unsigned long Pn_base[],
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5610
    //                   unsigned long Pm_base[], unsigned long inv, int len) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5611
    //   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5612
    //   unsigned long *Pa, *Pb, *Pn, *Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5613
    //   unsigned long Ra, Rb, Rn, Rm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5614
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5615
    //   int i;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5616
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5617
    //   assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5618
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5619
    //   for (i = 0; i < len; i++) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5620
    //     int j;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5621
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5622
    //     Pa = Pa_base;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5623
    //     Pb = Pa_base + i;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5624
    //     Pm = Pm_base;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5625
    //     Pn = Pn_base + i;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5626
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5627
    //     Ra = *Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5628
    //     Rb = *Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5629
    //     Rm = *Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5630
    //     Rn = *Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5631
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5632
    //     int iters = (i+1)/2;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5633
    //     for (j = 0; iters--; j++) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5634
    //       assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5635
    //       MACC2(Ra, Rb, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5636
    //       Ra = *++Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5637
    //       Rb = *--Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5638
    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5639
    //       MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5640
    //       Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5641
    //       Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5642
    //     }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5643
    //     if ((i & 1) == 0) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5644
    //       assert(Ra == Pa_base[j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5645
    //       MACC(Ra, Ra, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5646
    //     }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5647
    //     iters = i/2;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5648
    //     assert(iters == i-j, "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5649
    //     for (; iters--; j++) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5650
    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5651
    //       MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5652
    //       Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5653
    //       Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5654
    //     }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5655
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5656
    //     *Pm = Rm = t0 * inv;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5657
    //     assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5658
    //     MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5659
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5660
    //     assert(t0 == 0, "broken Montgomery multiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5661
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5662
    //     t0 = t1; t1 = t2; t2 = 0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5663
    //   }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5664
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5665
    //   for (i = len; i < 2*len; i++) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5666
    //     int start = i-len+1;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5667
    //     int end = start + (len - start)/2;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5668
    //     int j;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5669
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5670
    //     Pa = Pa_base + i-len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5671
    //     Pb = Pa_base + len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5672
    //     Pm = Pm_base + i-len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5673
    //     Pn = Pn_base + len;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5674
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5675
    //     Ra = *++Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5676
    //     Rb = *--Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5677
    //     Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5678
    //     Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5679
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5680
    //     int iters = (2*len-i-1)/2;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5681
    //     assert(iters == end-start, "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5682
    //     for (j = start; iters--; j++) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5683
    //       assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5684
    //       MACC2(Ra, Rb, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5685
    //       Ra = *++Pa;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5686
    //       Rb = *--Pb;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5687
    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5688
    //       MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5689
    //       Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5690
    //       Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5691
    //     }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5692
    //     if ((i & 1) == 0) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5693
    //       assert(Ra == Pa_base[j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5694
    //       MACC(Ra, Ra, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5695
    //     }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5696
    //     iters =  (2*len-i)/2;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5697
    //     assert(iters == len-j, "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5698
    //     for (; iters--; j++) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5699
    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5700
    //       MACC(Rm, Rn, t0, t1, t2);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5701
    //       Rm = *++Pm;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5702
    //       Rn = *--Pn;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5703
    //     }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5704
    //     Pm_base[i-len] = t0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5705
    //     t0 = t1; t1 = t2; t2 = 0;
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5706
    //   }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5707
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5708
    //   while (t0)
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5709
    //     t0 = sub(Pm_base, Pn_base, t0, len);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5710
    // }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5711
  };
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5712
46814
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  5713
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5714
  // Initialization
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5715
  void generate_initial() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5716
    // Generate initial stubs and initializes the entry points
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5717
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5718
    // entry points that exist in all platforms Note: This is code
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5719
    // that could be shared among different platforms - however the
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5720
    // benefit seems to be smaller than the disadvantage of having a
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5721
    // much more complicated generator structure. See also comment in
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5722
    // stubRoutines.hpp.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5723
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5724
    StubRoutines::_forward_exception_entry = generate_forward_exception();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5725
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5726
    StubRoutines::_call_stub_entry =
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5727
      generate_call_stub(StubRoutines::_call_stub_return_address);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5728
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5729
    // is referenced by megamorphic call
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5730
    StubRoutines::_catch_exception_entry = generate_catch_exception();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5731
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5732
    // Build this early so it's available for the interpreter.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5733
    StubRoutines::_throw_StackOverflowError_entry =
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5734
      generate_throw_exception("StackOverflowError throw_exception",
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5735
                               CAST_FROM_FN_PTR(address,
43439
5e03c9ba74f3 8172144: AArch64: Implement "JEP 270: Reserved Stack Areas for Critical Sections"
aph
parents: 42577
diff changeset
  5736
                                                SharedRuntime::throw_StackOverflowError));
5e03c9ba74f3 8172144: AArch64: Implement "JEP 270: Reserved Stack Areas for Critical Sections"
aph
parents: 42577
diff changeset
  5737
    StubRoutines::_throw_delayed_StackOverflowError_entry =
5e03c9ba74f3 8172144: AArch64: Implement "JEP 270: Reserved Stack Areas for Critical Sections"
aph
parents: 42577
diff changeset
  5738
      generate_throw_exception("delayed StackOverflowError throw_exception",
5e03c9ba74f3 8172144: AArch64: Implement "JEP 270: Reserved Stack Areas for Critical Sections"
aph
parents: 42577
diff changeset
  5739
                               CAST_FROM_FN_PTR(address,
5e03c9ba74f3 8172144: AArch64: Implement "JEP 270: Reserved Stack Areas for Critical Sections"
aph
parents: 42577
diff changeset
  5740
                                                SharedRuntime::throw_delayed_StackOverflowError));
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5741
    if (UseCRC32Intrinsics) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5742
      // set table address before stub generation which use it
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5743
      StubRoutines::_crc_table_adr = (address)StubRoutines::aarch64::_crc_table;
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5744
      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5745
    }
47767
107622f2695c 8189745: AARCH64: Use CRC32C intrinsic code in interpreter and C1
dchuyko
parents: 47571
diff changeset
  5746
107622f2695c 8189745: AARCH64: Use CRC32C intrinsic code in interpreter and C1
dchuyko
parents: 47571
diff changeset
  5747
    if (UseCRC32CIntrinsics) {
107622f2695c 8189745: AARCH64: Use CRC32C intrinsic code in interpreter and C1
dchuyko
parents: 47571
diff changeset
  5748
      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
107622f2695c 8189745: AARCH64: Use CRC32C intrinsic code in interpreter and C1
dchuyko
parents: 47571
diff changeset
  5749
    }
50753
4449b45900f1 8196402: AARCH64: create intrinsic for Math.log
dpochepk
parents: 50728
diff changeset
  5750
52927
226c451bd954 8215133: AARCH64: disable Math.log intrinsic publishing
dpochepk
parents: 51756
diff changeset
  5751
    // Disabled until JDK-8210858 is fixed
226c451bd954 8215133: AARCH64: disable Math.log intrinsic publishing
dpochepk
parents: 51756
diff changeset
  5752
    // if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) {
226c451bd954 8215133: AARCH64: disable Math.log intrinsic publishing
dpochepk
parents: 51756
diff changeset
  5753
    //   StubRoutines::_dlog = generate_dlog();
226c451bd954 8215133: AARCH64: disable Math.log intrinsic publishing
dpochepk
parents: 51756
diff changeset
  5754
    // }
50755
680d04ae76e9 8204289: AARCH64: enable math intrinsics usage in interpreter and C1
dpochepk
parents: 50754
diff changeset
  5755
680d04ae76e9 8204289: AARCH64: enable math intrinsics usage in interpreter and C1
dpochepk
parents: 50754
diff changeset
  5756
    if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
680d04ae76e9 8204289: AARCH64: enable math intrinsics usage in interpreter and C1
dpochepk
parents: 50754
diff changeset
  5757
      StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false);
680d04ae76e9 8204289: AARCH64: enable math intrinsics usage in interpreter and C1
dpochepk
parents: 50754
diff changeset
  5758
    }
680d04ae76e9 8204289: AARCH64: enable math intrinsics usage in interpreter and C1
dpochepk
parents: 50754
diff changeset
  5759
680d04ae76e9 8204289: AARCH64: enable math intrinsics usage in interpreter and C1
dpochepk
parents: 50754
diff changeset
  5760
    if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
680d04ae76e9 8204289: AARCH64: enable math intrinsics usage in interpreter and C1
dpochepk
parents: 50754
diff changeset
  5761
      StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true);
680d04ae76e9 8204289: AARCH64: enable math intrinsics usage in interpreter and C1
dpochepk
parents: 50754
diff changeset
  5762
    }
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5763
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5764
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5765
  void generate_all() {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5766
    // support for verify_oop (must happen after universe_init)
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5767
    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5768
    StubRoutines::_throw_AbstractMethodError_entry =
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5769
      generate_throw_exception("AbstractMethodError throw_exception",
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5770
                               CAST_FROM_FN_PTR(address,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5771
                                                SharedRuntime::
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5772
                                                throw_AbstractMethodError));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5773
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5774
    StubRoutines::_throw_IncompatibleClassChangeError_entry =
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5775
      generate_throw_exception("IncompatibleClassChangeError throw_exception",
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5776
                               CAST_FROM_FN_PTR(address,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5777
                                                SharedRuntime::
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5778
                                                throw_IncompatibleClassChangeError));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5779
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5780
    StubRoutines::_throw_NullPointerException_at_call_entry =
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5781
      generate_throw_exception("NullPointerException at call throw_exception",
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5782
                               CAST_FROM_FN_PTR(address,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5783
                                                SharedRuntime::
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5784
                                                throw_NullPointerException_at_call));
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5785
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5786
    // arraycopy stubs used by compilers
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5787
    generate_arraycopy_stubs();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5788
46814
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  5789
    // has negatives stub for large arrays.
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  5790
    StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long);
2e45cd2fdcb6 8184943: AARCH64: Intrinsify hasNegatives
dpochepk
parents: 46720
diff changeset
  5791
49724
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  5792
    // array equals stub for large arrays.
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  5793
    if (!UseSimpleArrayEquals) {
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  5794
      StubRoutines::aarch64::_large_array_equals = generate_large_array_equals();
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  5795
    }
bf7f42f2f025 8187472: AARCH64: array_equals intrinsic doesn't use prefetch for large arrays
dpochepk
parents: 49484
diff changeset
  5796
50756
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  5797
    generate_compare_long_strings();
7ad092f40454 8202326: AARCH64: optimize string compare intrinsic
dpochepk
parents: 50755
diff changeset
  5798
50757
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  5799
    generate_string_indexof_stubs();
866c9aa29ee4 8189103: AARCH64: optimize String indexOf intrinsic
dpochepk
parents: 50756
diff changeset
  5800
50758
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  5801
    // byte_array_inflate stub for large arrays.
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  5802
    StubRoutines::aarch64::_large_byte_array_inflate = generate_large_byte_array_inflate();
afca3c78ea0f 8189113: AARCH64: StringLatin1 inflate intrinsic doesn't use prefetch instruction
dpochepk
parents: 50757
diff changeset
  5803
51619
dca697c71e5d 8207247: AARCH64: Enable Minimal and Client VM builds
avoitylov
parents: 51374
diff changeset
  5804
#ifdef COMPILER2
30225
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  5805
    if (UseMultiplyToLenIntrinsic) {
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  5806
      StubRoutines::_multiplyToLen = generate_multiplyToLen();
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  5807
    }
e9722ea461d4 8077615: AARCH64: Add C2 intrinsic for BigInteger::multiplyToLen() method
aph
parents: 29695
diff changeset
  5808
47571
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  5809
    if (UseSquareToLenIntrinsic) {
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  5810
      StubRoutines::_squareToLen = generate_squareToLen();
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  5811
    }
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  5812
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  5813
    if (UseMulAddIntrinsic) {
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  5814
      StubRoutines::_mulAdd = generate_mulAdd();
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  5815
    }
c19054f06c14 8186915: AARCH64: Intrinsify squareToLen and mulAdd
dpochepk
parents: 47216
diff changeset
  5816
31955
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5817
    if (UseMontgomeryMultiplyIntrinsic) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5818
      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5819
      MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5820
      StubRoutines::_montgomeryMultiply = g.generate_multiply();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5821
    }
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5822
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5823
    if (UseMontgomerySquareIntrinsic) {
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5824
      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5825
      MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5826
      // We use generate_multiply() rather than generate_square()
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5827
      // because it's faster for the sizes of modulus we care about.
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5828
      StubRoutines::_montgomerySquare = g.generate_multiply();
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5829
    }
51619
dca697c71e5d 8207247: AARCH64: Enable Minimal and Client VM builds
avoitylov
parents: 51374
diff changeset
  5830
#endif // COMPILER2
31955
c6ac18ab3d6b 8131779: AARCH64: add Montgomery multiply intrinsic
aph
parents: 31591
diff changeset
  5831
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5832
#ifndef BUILTIN_SIM
31961
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  5833
    // generate GHASH intrinsics code
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  5834
    if (UseGHASHIntrinsics) {
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  5835
      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  5836
    }
70adcff5840c 8131062: aarch64: add support for GHASH acceleration
enevill
parents: 31955
diff changeset
  5837
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5838
    if (UseAESIntrinsics) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5839
      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5840
      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5841
      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5842
      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5843
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5844
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5845
    if (UseSHA1Intrinsics) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5846
      StubRoutines::_sha1_implCompress     = generate_sha1_implCompress(false,   "sha1_implCompress");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5847
      StubRoutines::_sha1_implCompressMB   = generate_sha1_implCompress(true,    "sha1_implCompressMB");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5848
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5849
    if (UseSHA256Intrinsics) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5850
      StubRoutines::_sha256_implCompress   = generate_sha256_implCompress(false, "sha256_implCompress");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5851
      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5852
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5853
33176
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  5854
    // generate Adler32 intrinsics code
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  5855
    if (UseAdler32Intrinsics) {
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  5856
      StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  5857
    }
54393049bf1e 8139043: aarch64: add support for adler32 intrinsic
enevill
parents: 32627
diff changeset
  5858
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5859
    // Safefetch stubs.
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5860
    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5861
                                                       &StubRoutines::_safefetch32_fault_pc,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5862
                                                       &StubRoutines::_safefetch32_continuation_pc);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5863
    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5864
                                                       &StubRoutines::_safefetchN_fault_pc,
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5865
                                                       &StubRoutines::_safefetchN_continuation_pc);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5866
#endif
45054
c09733aaf97f 8179444: AArch64: Put zero_words on a diet
aph
parents: 43439
diff changeset
  5867
    StubRoutines::aarch64::set_completed();
29183
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5868
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5869
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5870
 public:
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5871
  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5872
    if (all) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5873
      generate_all();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5874
    } else {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5875
      generate_initial();
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5876
    }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5877
  }
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5878
}; // end class declaration
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5879
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5880
void StubGenerator_generate(CodeBuffer* code, bool all) {
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5881
  StubGenerator g(code, all);
0cc8699f7372 8068054: AARCH64: Assembler interpreter, shared runtime
aph
parents:
diff changeset
  5882
}