hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
changeset 14132 3c1437abcefd
parent 13952 e3cf184080bc
child 14626 0cf4eccf130f
child 14583 d70ee55535f4
equal deleted inserted replaced
14131:e376e3d428c9 14132:3c1437abcefd
  2135       __ pow_with_fallback(0);
  2135       __ pow_with_fallback(0);
  2136       __ ret(0);
  2136       __ ret(0);
  2137     }
  2137     }
  2138   }
  2138   }
  2139 
  2139 
       
  2140   // AES intrinsic stubs
       
  2141   enum {AESBlockSize = 16};
       
  2142 
       
  2143   address generate_key_shuffle_mask() {
       
  2144     __ align(16);
       
  2145     StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
       
  2146     address start = __ pc();
       
  2147     __ emit_data(0x00010203, relocInfo::none, 0 );
       
  2148     __ emit_data(0x04050607, relocInfo::none, 0 );
       
  2149     __ emit_data(0x08090a0b, relocInfo::none, 0 );
       
  2150     __ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
       
  2151     return start;
       
  2152   }
       
  2153 
       
  2154   // Utility routine for loading a 128-bit key word in little endian format
       
  2155   // can optionally specify that the shuffle mask is already in an xmmregister
       
  2156   void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
       
  2157     __ movdqu(xmmdst, Address(key, offset));
       
  2158     if (xmm_shuf_mask != NULL) {
       
  2159       __ pshufb(xmmdst, xmm_shuf_mask);
       
  2160     } else {
       
  2161       __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
       
  2162     }
       
  2163   }
       
  2164 
       
  2165   // aesenc using specified key+offset
       
  2166   // can optionally specify that the shuffle mask is already in an xmmregister
       
  2167   void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
       
  2168     load_key(xmmtmp, key, offset, xmm_shuf_mask);
       
  2169     __ aesenc(xmmdst, xmmtmp);
       
  2170   }
       
  2171 
       
  2172   // aesdec using specified key+offset
       
  2173   // can optionally specify that the shuffle mask is already in an xmmregister
       
  2174   void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
       
  2175     load_key(xmmtmp, key, offset, xmm_shuf_mask);
       
  2176     __ aesdec(xmmdst, xmmtmp);
       
  2177   }
       
  2178 
       
  2179 
       
  2180   // Arguments:
       
  2181   //
       
  2182   // Inputs:
       
  2183   //   c_rarg0   - source byte array address
       
  2184   //   c_rarg1   - destination byte array address
       
  2185   //   c_rarg2   - K (key) in little endian int array
       
  2186   //
       
  2187   address generate_aescrypt_encryptBlock() {
       
  2188     assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
       
  2189     __ align(CodeEntryAlignment);
       
  2190     StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
       
  2191     Label L_doLast;
       
  2192     address start = __ pc();
       
  2193 
       
  2194     const Register from        = rsi;      // source array address
       
  2195     const Register to          = rdx;      // destination array address
       
  2196     const Register key         = rcx;      // key array address
       
  2197     const Register keylen      = rax;
       
  2198     const Address  from_param(rbp, 8+0);
       
  2199     const Address  to_param  (rbp, 8+4);
       
  2200     const Address  key_param (rbp, 8+8);
       
  2201 
       
  2202     const XMMRegister xmm_result = xmm0;
       
  2203     const XMMRegister xmm_temp   = xmm1;
       
  2204     const XMMRegister xmm_key_shuf_mask = xmm2;
       
  2205 
       
  2206     __ enter(); // required for proper stackwalking of RuntimeStub frame
       
  2207     __ push(rsi);
       
  2208     __ movptr(from , from_param);
       
  2209     __ movptr(to   , to_param);
       
  2210     __ movptr(key  , key_param);
       
  2211 
       
  2212     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
       
  2213     // keylen = # of 32-bit words, convert to 128-bit words
       
  2214     __ shrl(keylen, 2);
       
  2215     __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
       
  2216 
       
  2217     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
       
  2218     __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
       
  2219 
       
  2220     // For encryption, the java expanded key ordering is just what we need
       
  2221 
       
  2222     load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
       
  2223     __ pxor(xmm_result, xmm_temp);
       
  2224     for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
       
  2225       aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
       
  2226     }
       
  2227     load_key  (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
       
  2228     __ cmpl(keylen, 0);
       
  2229     __ jcc(Assembler::equal, L_doLast);
       
  2230     __ aesenc(xmm_result, xmm_temp);                   // only in 192 and 256 bit keys
       
  2231     aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
       
  2232     load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
       
  2233     __ subl(keylen, 2);
       
  2234     __ jcc(Assembler::equal, L_doLast);
       
  2235     __ aesenc(xmm_result, xmm_temp);                   // only in 256 bit keys
       
  2236     aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
       
  2237     load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
       
  2238 
       
  2239     __ BIND(L_doLast);
       
  2240     __ aesenclast(xmm_result, xmm_temp);
       
  2241     __ movdqu(Address(to, 0), xmm_result);        // store the result
       
  2242     __ xorptr(rax, rax); // return 0
       
  2243     __ pop(rsi);
       
  2244     __ leave(); // required for proper stackwalking of RuntimeStub frame
       
  2245     __ ret(0);
       
  2246 
       
  2247     return start;
       
  2248   }
       
  2249 
       
  2250 
       
  2251   // Arguments:
       
  2252   //
       
  2253   // Inputs:
       
  2254   //   c_rarg0   - source byte array address
       
  2255   //   c_rarg1   - destination byte array address
       
  2256   //   c_rarg2   - K (key) in little endian int array
       
  2257   //
       
  2258   address generate_aescrypt_decryptBlock() {
       
  2259     assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
       
  2260     __ align(CodeEntryAlignment);
       
  2261     StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
       
  2262     Label L_doLast;
       
  2263     address start = __ pc();
       
  2264 
       
  2265     const Register from        = rsi;      // source array address
       
  2266     const Register to          = rdx;      // destination array address
       
  2267     const Register key         = rcx;      // key array address
       
  2268     const Register keylen      = rax;
       
  2269     const Address  from_param(rbp, 8+0);
       
  2270     const Address  to_param  (rbp, 8+4);
       
  2271     const Address  key_param (rbp, 8+8);
       
  2272 
       
  2273     const XMMRegister xmm_result = xmm0;
       
  2274     const XMMRegister xmm_temp   = xmm1;
       
  2275     const XMMRegister xmm_key_shuf_mask = xmm2;
       
  2276 
       
  2277     __ enter(); // required for proper stackwalking of RuntimeStub frame
       
  2278     __ push(rsi);
       
  2279     __ movptr(from , from_param);
       
  2280     __ movptr(to   , to_param);
       
  2281     __ movptr(key  , key_param);
       
  2282 
       
  2283     __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
       
  2284     // keylen = # of 32-bit words, convert to 128-bit words
       
  2285     __ shrl(keylen, 2);
       
  2286     __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
       
  2287 
       
  2288     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
       
  2289     __ movdqu(xmm_result, Address(from, 0));
       
  2290 
       
  2291     // for decryption java expanded key ordering is rotated one position from what we want
       
  2292     // so we start from 0x10 here and hit 0x00 last
       
  2293     // we don't know if the key is aligned, hence not using load-execute form
       
  2294     load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
       
  2295     __ pxor  (xmm_result, xmm_temp);
       
  2296     for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
       
  2297       aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
       
  2298     }
       
  2299     __ cmpl(keylen, 0);
       
  2300     __ jcc(Assembler::equal, L_doLast);
       
  2301     // only in 192 and 256 bit keys
       
  2302     aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
       
  2303     aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
       
  2304     __ subl(keylen, 2);
       
  2305     __ jcc(Assembler::equal, L_doLast);
       
  2306     // only in 256 bit keys
       
  2307     aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
       
  2308     aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
       
  2309 
       
  2310     __ BIND(L_doLast);
       
  2311     // for decryption the aesdeclast operation is always on key+0x00
       
  2312     load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
       
  2313     __ aesdeclast(xmm_result, xmm_temp);
       
  2314 
       
  2315     __ movdqu(Address(to, 0), xmm_result);  // store the result
       
  2316 
       
  2317     __ xorptr(rax, rax); // return 0
       
  2318     __ pop(rsi);
       
  2319     __ leave(); // required for proper stackwalking of RuntimeStub frame
       
  2320     __ ret(0);
       
  2321 
       
  2322     return start;
       
  2323   }
       
  2324 
       
  2325   void handleSOERegisters(bool saving) {
       
  2326     const int saveFrameSizeInBytes = 4 * wordSize;
       
  2327     const Address saved_rbx     (rbp, -3 * wordSize);
       
  2328     const Address saved_rsi     (rbp, -2 * wordSize);
       
  2329     const Address saved_rdi     (rbp, -1 * wordSize);
       
  2330 
       
  2331     if (saving) {
       
  2332       __ subptr(rsp, saveFrameSizeInBytes);
       
  2333       __ movptr(saved_rsi, rsi);
       
  2334       __ movptr(saved_rdi, rdi);
       
  2335       __ movptr(saved_rbx, rbx);
       
  2336     } else {
       
  2337       // restoring
       
  2338       __ movptr(rsi, saved_rsi);
       
  2339       __ movptr(rdi, saved_rdi);
       
  2340       __ movptr(rbx, saved_rbx);
       
  2341     }
       
  2342   }
       
  2343 
       
  2344   // Arguments:
       
  2345   //
       
  2346   // Inputs:
       
  2347   //   c_rarg0   - source byte array address
       
  2348   //   c_rarg1   - destination byte array address
       
  2349   //   c_rarg2   - K (key) in little endian int array
       
  2350   //   c_rarg3   - r vector byte array address
       
  2351   //   c_rarg4   - input length
       
  2352   //
       
  2353   address generate_cipherBlockChaining_encryptAESCrypt() {
       
  2354     assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
       
  2355     __ align(CodeEntryAlignment);
       
  2356     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
       
  2357     address start = __ pc();
       
  2358 
       
  2359     Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
       
  2360     const Register from        = rsi;      // source array address
       
  2361     const Register to          = rdx;      // destination array address
       
  2362     const Register key         = rcx;      // key array address
       
  2363     const Register rvec        = rdi;      // r byte array initialized from initvector array address
       
  2364                                            // and left with the results of the last encryption block
       
  2365     const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
       
  2366     const Register pos         = rax;
       
  2367 
       
  2368     // xmm register assignments for the loops below
       
  2369     const XMMRegister xmm_result = xmm0;
       
  2370     const XMMRegister xmm_temp   = xmm1;
       
  2371     // first 6 keys preloaded into xmm2-xmm7
       
  2372     const int XMM_REG_NUM_KEY_FIRST = 2;
       
  2373     const int XMM_REG_NUM_KEY_LAST  = 7;
       
  2374     const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
       
  2375 
       
  2376     __ enter(); // required for proper stackwalking of RuntimeStub frame
       
  2377     handleSOERegisters(true /*saving*/);
       
  2378 
       
  2379     // load registers from incoming parameters
       
  2380     const Address  from_param(rbp, 8+0);
       
  2381     const Address  to_param  (rbp, 8+4);
       
  2382     const Address  key_param (rbp, 8+8);
       
  2383     const Address  rvec_param (rbp, 8+12);
       
  2384     const Address  len_param  (rbp, 8+16);
       
  2385     __ movptr(from , from_param);
       
  2386     __ movptr(to   , to_param);
       
  2387     __ movptr(key  , key_param);
       
  2388     __ movptr(rvec , rvec_param);
       
  2389     __ movptr(len_reg , len_param);
       
  2390 
       
  2391     const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
       
  2392     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
       
  2393     // load up xmm regs 2 thru 7 with keys 0-5
       
  2394     for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
       
  2395       load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
       
  2396       offset += 0x10;
       
  2397     }
       
  2398 
       
  2399     __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
       
  2400 
       
  2401     // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
       
  2402     __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
       
  2403     __ cmpl(rax, 44);
       
  2404     __ jcc(Assembler::notEqual, L_key_192_256);
       
  2405 
       
  2406     // 128 bit code follows here
       
  2407     __ movptr(pos, 0);
       
  2408     __ align(OptoLoopAlignment);
       
  2409     __ BIND(L_loopTop_128);
       
  2410     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
       
  2411     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
       
  2412 
       
  2413     __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
       
  2414     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
       
  2415       __ aesenc(xmm_result, as_XMMRegister(rnum));
       
  2416     }
       
  2417     for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
       
  2418       aes_enc_key(xmm_result, xmm_temp, key, key_offset);
       
  2419     }
       
  2420     load_key(xmm_temp, key, 0xa0);
       
  2421     __ aesenclast(xmm_result, xmm_temp);
       
  2422 
       
  2423     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
       
  2424     // no need to store r to memory until we exit
       
  2425     __ addptr(pos, AESBlockSize);
       
  2426     __ subptr(len_reg, AESBlockSize);
       
  2427     __ jcc(Assembler::notEqual, L_loopTop_128);
       
  2428 
       
  2429     __ BIND(L_exit);
       
  2430     __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
       
  2431 
       
  2432     handleSOERegisters(false /*restoring*/);
       
  2433     __ movl(rax, 0);                             // return 0 (why?)
       
  2434     __ leave();                                  // required for proper stackwalking of RuntimeStub frame
       
  2435     __ ret(0);
       
  2436 
       
  2437   __ BIND(L_key_192_256);
       
  2438   // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
       
  2439     __ cmpl(rax, 52);
       
  2440     __ jcc(Assembler::notEqual, L_key_256);
       
  2441 
       
  2442     // 192-bit code follows here (could be changed to use more xmm registers)
       
  2443     __ movptr(pos, 0);
       
  2444   __ align(OptoLoopAlignment);
       
  2445   __ BIND(L_loopTop_192);
       
  2446     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
       
  2447     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
       
  2448 
       
  2449     __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
       
  2450     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
       
  2451       __ aesenc(xmm_result, as_XMMRegister(rnum));
       
  2452     }
       
  2453     for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
       
  2454       aes_enc_key(xmm_result, xmm_temp, key, key_offset);
       
  2455     }
       
  2456     load_key(xmm_temp, key, 0xc0);
       
  2457     __ aesenclast(xmm_result, xmm_temp);
       
  2458 
       
  2459     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
       
  2460     // no need to store r to memory until we exit
       
  2461     __ addptr(pos, AESBlockSize);
       
  2462     __ subptr(len_reg, AESBlockSize);
       
  2463     __ jcc(Assembler::notEqual, L_loopTop_192);
       
  2464     __ jmp(L_exit);
       
  2465 
       
  2466   __ BIND(L_key_256);
       
  2467     // 256-bit code follows here (could be changed to use more xmm registers)
       
  2468     __ movptr(pos, 0);
       
  2469   __ align(OptoLoopAlignment);
       
  2470   __ BIND(L_loopTop_256);
       
  2471     __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
       
  2472     __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
       
  2473 
       
  2474     __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
       
  2475     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
       
  2476       __ aesenc(xmm_result, as_XMMRegister(rnum));
       
  2477     }
       
  2478     for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
       
  2479       aes_enc_key(xmm_result, xmm_temp, key, key_offset);
       
  2480     }
       
  2481     load_key(xmm_temp, key, 0xe0);
       
  2482     __ aesenclast(xmm_result, xmm_temp);
       
  2483 
       
  2484     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
       
  2485     // no need to store r to memory until we exit
       
  2486     __ addptr(pos, AESBlockSize);
       
  2487     __ subptr(len_reg, AESBlockSize);
       
  2488     __ jcc(Assembler::notEqual, L_loopTop_256);
       
  2489     __ jmp(L_exit);
       
  2490 
       
  2491     return start;
       
  2492   }
       
  2493 
       
  2494 
       
  2495   // CBC AES Decryption.
       
  2496   // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
       
  2497   //
       
  2498   // Arguments:
       
  2499   //
       
  2500   // Inputs:
       
  2501   //   c_rarg0   - source byte array address
       
  2502   //   c_rarg1   - destination byte array address
       
  2503   //   c_rarg2   - K (key) in little endian int array
       
  2504   //   c_rarg3   - r vector byte array address
       
  2505   //   c_rarg4   - input length
       
  2506   //
       
  2507 
       
  2508   address generate_cipherBlockChaining_decryptAESCrypt() {
       
  2509     assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
       
  2510     __ align(CodeEntryAlignment);
       
  2511     StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
       
  2512     address start = __ pc();
       
  2513 
       
  2514     Label L_exit, L_key_192_256, L_key_256;
       
  2515     Label L_singleBlock_loopTop_128;
       
  2516     Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
       
  2517     const Register from        = rsi;      // source array address
       
  2518     const Register to          = rdx;      // destination array address
       
  2519     const Register key         = rcx;      // key array address
       
  2520     const Register rvec        = rdi;      // r byte array initialized from initvector array address
       
  2521                                            // and left with the results of the last encryption block
       
  2522     const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
       
  2523     const Register pos         = rax;
       
  2524 
       
  2525     // xmm register assignments for the loops below
       
  2526     const XMMRegister xmm_result = xmm0;
       
  2527     const XMMRegister xmm_temp   = xmm1;
       
  2528     // first 6 keys preloaded into xmm2-xmm7
       
  2529     const int XMM_REG_NUM_KEY_FIRST = 2;
       
  2530     const int XMM_REG_NUM_KEY_LAST  = 7;
       
  2531     const int FIRST_NON_REG_KEY_offset = 0x70;
       
  2532     const XMMRegister xmm_key_first   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
       
  2533 
       
  2534     __ enter(); // required for proper stackwalking of RuntimeStub frame
       
  2535     handleSOERegisters(true /*saving*/);
       
  2536 
       
  2537     // load registers from incoming parameters
       
  2538     const Address  from_param(rbp, 8+0);
       
  2539     const Address  to_param  (rbp, 8+4);
       
  2540     const Address  key_param (rbp, 8+8);
       
  2541     const Address  rvec_param (rbp, 8+12);
       
  2542     const Address  len_param  (rbp, 8+16);
       
  2543     __ movptr(from , from_param);
       
  2544     __ movptr(to   , to_param);
       
  2545     __ movptr(key  , key_param);
       
  2546     __ movptr(rvec , rvec_param);
       
  2547     __ movptr(len_reg , len_param);
       
  2548 
       
  2549     // the java expanded key ordering is rotated one position from what we want
       
  2550     // so we start from 0x10 here and hit 0x00 last
       
  2551     const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
       
  2552     __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
       
  2553     // load up xmm regs 2 thru 6 with first 5 keys
       
  2554     for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
       
  2555       load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
       
  2556       offset += 0x10;
       
  2557     }
       
  2558 
       
  2559     // inside here, use the rvec register to point to previous block cipher
       
  2560     // with which we xor at the end of each newly decrypted block
       
  2561     const Register  prev_block_cipher_ptr = rvec;
       
  2562 
       
  2563     // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
       
  2564     __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
       
  2565     __ cmpl(rax, 44);
       
  2566     __ jcc(Assembler::notEqual, L_key_192_256);
       
  2567 
       
  2568 
       
  2569     // 128-bit code follows here, parallelized
       
  2570     __ movptr(pos, 0);
       
  2571   __ align(OptoLoopAlignment);
       
  2572   __ BIND(L_singleBlock_loopTop_128);
       
  2573     __ cmpptr(len_reg, 0);           // any blocks left??
       
  2574     __ jcc(Assembler::equal, L_exit);
       
  2575     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
       
  2576     __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
       
  2577     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
       
  2578       __ aesdec(xmm_result, as_XMMRegister(rnum));
       
  2579     }
       
  2580     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) {   // 128-bit runs up to key offset a0
       
  2581       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
       
  2582     }
       
  2583     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
       
  2584     __ aesdeclast(xmm_result, xmm_temp);
       
  2585     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
       
  2586     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
       
  2587     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
       
  2588     // no need to store r to memory until we exit
       
  2589     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
       
  2590     __ addptr(pos, AESBlockSize);
       
  2591     __ subptr(len_reg, AESBlockSize);
       
  2592     __ jmp(L_singleBlock_loopTop_128);
       
  2593 
       
  2594 
       
  2595     __ BIND(L_exit);
       
  2596     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
       
  2597     __ movptr(rvec , rvec_param);                                     // restore this since used in loop
       
  2598     __ movdqu(Address(rvec, 0), xmm_temp);                            // final value of r stored in rvec of CipherBlockChaining object
       
  2599     handleSOERegisters(false /*restoring*/);
       
  2600     __ movl(rax, 0);                                                  // return 0 (why?)
       
  2601     __ leave();                                                       // required for proper stackwalking of RuntimeStub frame
       
  2602     __ ret(0);
       
  2603 
       
  2604 
       
  2605     __ BIND(L_key_192_256);
       
  2606     // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
       
  2607     __ cmpl(rax, 52);
       
  2608     __ jcc(Assembler::notEqual, L_key_256);
       
  2609 
       
  2610     // 192-bit code follows here (could be optimized to use parallelism)
       
  2611     __ movptr(pos, 0);
       
  2612     __ align(OptoLoopAlignment);
       
  2613     __ BIND(L_singleBlock_loopTop_192);
       
  2614     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
       
  2615     __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
       
  2616     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
       
  2617       __ aesdec(xmm_result, as_XMMRegister(rnum));
       
  2618     }
       
  2619     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) {   // 192-bit runs up to key offset c0
       
  2620       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
       
  2621     }
       
  2622     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
       
  2623     __ aesdeclast(xmm_result, xmm_temp);
       
  2624     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
       
  2625     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
       
  2626     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
       
  2627     // no need to store r to memory until we exit
       
  2628     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
       
  2629     __ addptr(pos, AESBlockSize);
       
  2630     __ subptr(len_reg, AESBlockSize);
       
  2631     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
       
  2632     __ jmp(L_exit);
       
  2633 
       
  2634     __ BIND(L_key_256);
       
  2635     // 256-bit code follows here (could be optimized to use parallelism)
       
  2636     __ movptr(pos, 0);
       
  2637     __ align(OptoLoopAlignment);
       
  2638     __ BIND(L_singleBlock_loopTop_256);
       
  2639     __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
       
  2640     __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
       
  2641     for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
       
  2642       __ aesdec(xmm_result, as_XMMRegister(rnum));
       
  2643     }
       
  2644     for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
       
  2645       aes_dec_key(xmm_result, xmm_temp, key, key_offset);
       
  2646     }
       
  2647     load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
       
  2648     __ aesdeclast(xmm_result, xmm_temp);
       
  2649     __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
       
  2650     __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
       
  2651     __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
       
  2652     // no need to store r to memory until we exit
       
  2653     __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
       
  2654     __ addptr(pos, AESBlockSize);
       
  2655     __ subptr(len_reg, AESBlockSize);
       
  2656     __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
       
  2657     __ jmp(L_exit);
       
  2658 
       
  2659     return start;
       
  2660   }
       
  2661 
       
  2662 
  2140  public:
  2663  public:
  2141   // Information about frame layout at time of blocking runtime call.
  2664   // Information about frame layout at time of blocking runtime call.
  2142   // Note that we only have to preserve callee-saved registers since
  2665   // Note that we only have to preserve callee-saved registers since
  2143   // the compilers are responsible for supplying a continuation point
  2666   // the compilers are responsible for supplying a continuation point
  2144   // if they expect all registers to be preserved.
  2667   // if they expect all registers to be preserved.
  2330 
  2853 
  2331     // arraycopy stubs used by compilers
  2854     // arraycopy stubs used by compilers
  2332     generate_arraycopy_stubs();
  2855     generate_arraycopy_stubs();
  2333 
  2856 
  2334     generate_math_stubs();
  2857     generate_math_stubs();
       
  2858 
       
  2859     // don't bother generating these AES intrinsic stubs unless global flag is set
       
  2860     if (UseAESIntrinsics) {
       
  2861       StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // might be needed by the others
       
  2862 
       
  2863       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
       
  2864       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
       
  2865       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
       
  2866       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
       
  2867     }
  2335   }
  2868   }
  2336 
  2869 
  2337 
  2870 
  2338  public:
  2871  public:
  2339   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
  2872   StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {