2135 __ pow_with_fallback(0); |
2135 __ pow_with_fallback(0); |
2136 __ ret(0); |
2136 __ ret(0); |
2137 } |
2137 } |
2138 } |
2138 } |
2139 |
2139 |
|
2140 // AES intrinsic stubs |
|
2141 enum {AESBlockSize = 16}; |
|
2142 |
|
2143 address generate_key_shuffle_mask() { |
|
2144 __ align(16); |
|
2145 StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask"); |
|
2146 address start = __ pc(); |
|
2147 __ emit_data(0x00010203, relocInfo::none, 0 ); |
|
2148 __ emit_data(0x04050607, relocInfo::none, 0 ); |
|
2149 __ emit_data(0x08090a0b, relocInfo::none, 0 ); |
|
2150 __ emit_data(0x0c0d0e0f, relocInfo::none, 0 ); |
|
2151 return start; |
|
2152 } |
|
2153 |
|
2154 // Utility routine for loading a 128-bit key word in little endian format |
|
2155 // can optionally specify that the shuffle mask is already in an xmmregister |
|
2156 void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2157 __ movdqu(xmmdst, Address(key, offset)); |
|
2158 if (xmm_shuf_mask != NULL) { |
|
2159 __ pshufb(xmmdst, xmm_shuf_mask); |
|
2160 } else { |
|
2161 __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2162 } |
|
2163 } |
|
2164 |
|
2165 // aesenc using specified key+offset |
|
2166 // can optionally specify that the shuffle mask is already in an xmmregister |
|
2167 void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2168 load_key(xmmtmp, key, offset, xmm_shuf_mask); |
|
2169 __ aesenc(xmmdst, xmmtmp); |
|
2170 } |
|
2171 |
|
2172 // aesdec using specified key+offset |
|
2173 // can optionally specify that the shuffle mask is already in an xmmregister |
|
2174 void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) { |
|
2175 load_key(xmmtmp, key, offset, xmm_shuf_mask); |
|
2176 __ aesdec(xmmdst, xmmtmp); |
|
2177 } |
|
2178 |
|
2179 |
|
2180 // Arguments: |
|
2181 // |
|
2182 // Inputs: |
|
2183 // c_rarg0 - source byte array address |
|
2184 // c_rarg1 - destination byte array address |
|
2185 // c_rarg2 - K (key) in little endian int array |
|
2186 // |
|
2187 address generate_aescrypt_encryptBlock() { |
|
2188 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); |
|
2189 __ align(CodeEntryAlignment); |
|
2190 StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); |
|
2191 Label L_doLast; |
|
2192 address start = __ pc(); |
|
2193 |
|
2194 const Register from = rsi; // source array address |
|
2195 const Register to = rdx; // destination array address |
|
2196 const Register key = rcx; // key array address |
|
2197 const Register keylen = rax; |
|
2198 const Address from_param(rbp, 8+0); |
|
2199 const Address to_param (rbp, 8+4); |
|
2200 const Address key_param (rbp, 8+8); |
|
2201 |
|
2202 const XMMRegister xmm_result = xmm0; |
|
2203 const XMMRegister xmm_temp = xmm1; |
|
2204 const XMMRegister xmm_key_shuf_mask = xmm2; |
|
2205 |
|
2206 __ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2207 __ push(rsi); |
|
2208 __ movptr(from , from_param); |
|
2209 __ movptr(to , to_param); |
|
2210 __ movptr(key , key_param); |
|
2211 |
|
2212 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2213 // keylen = # of 32-bit words, convert to 128-bit words |
|
2214 __ shrl(keylen, 2); |
|
2215 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more |
|
2216 |
|
2217 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2218 __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input |
|
2219 |
|
2220 // For encryption, the java expanded key ordering is just what we need |
|
2221 |
|
2222 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); |
|
2223 __ pxor(xmm_result, xmm_temp); |
|
2224 for (int offset = 0x10; offset <= 0x90; offset += 0x10) { |
|
2225 aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); |
|
2226 } |
|
2227 load_key (xmm_temp, key, 0xa0, xmm_key_shuf_mask); |
|
2228 __ cmpl(keylen, 0); |
|
2229 __ jcc(Assembler::equal, L_doLast); |
|
2230 __ aesenc(xmm_result, xmm_temp); // only in 192 and 256 bit keys |
|
2231 aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); |
|
2232 load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask); |
|
2233 __ subl(keylen, 2); |
|
2234 __ jcc(Assembler::equal, L_doLast); |
|
2235 __ aesenc(xmm_result, xmm_temp); // only in 256 bit keys |
|
2236 aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); |
|
2237 load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask); |
|
2238 |
|
2239 __ BIND(L_doLast); |
|
2240 __ aesenclast(xmm_result, xmm_temp); |
|
2241 __ movdqu(Address(to, 0), xmm_result); // store the result |
|
2242 __ xorptr(rax, rax); // return 0 |
|
2243 __ pop(rsi); |
|
2244 __ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2245 __ ret(0); |
|
2246 |
|
2247 return start; |
|
2248 } |
|
2249 |
|
2250 |
|
2251 // Arguments: |
|
2252 // |
|
2253 // Inputs: |
|
2254 // c_rarg0 - source byte array address |
|
2255 // c_rarg1 - destination byte array address |
|
2256 // c_rarg2 - K (key) in little endian int array |
|
2257 // |
|
2258 address generate_aescrypt_decryptBlock() { |
|
2259 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); |
|
2260 __ align(CodeEntryAlignment); |
|
2261 StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); |
|
2262 Label L_doLast; |
|
2263 address start = __ pc(); |
|
2264 |
|
2265 const Register from = rsi; // source array address |
|
2266 const Register to = rdx; // destination array address |
|
2267 const Register key = rcx; // key array address |
|
2268 const Register keylen = rax; |
|
2269 const Address from_param(rbp, 8+0); |
|
2270 const Address to_param (rbp, 8+4); |
|
2271 const Address key_param (rbp, 8+8); |
|
2272 |
|
2273 const XMMRegister xmm_result = xmm0; |
|
2274 const XMMRegister xmm_temp = xmm1; |
|
2275 const XMMRegister xmm_key_shuf_mask = xmm2; |
|
2276 |
|
2277 __ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2278 __ push(rsi); |
|
2279 __ movptr(from , from_param); |
|
2280 __ movptr(to , to_param); |
|
2281 __ movptr(key , key_param); |
|
2282 |
|
2283 __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2284 // keylen = # of 32-bit words, convert to 128-bit words |
|
2285 __ shrl(keylen, 2); |
|
2286 __ subl(keylen, 11); // every key has at least 11 128-bit words, some have more |
|
2287 |
|
2288 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2289 __ movdqu(xmm_result, Address(from, 0)); |
|
2290 |
|
2291 // for decryption java expanded key ordering is rotated one position from what we want |
|
2292 // so we start from 0x10 here and hit 0x00 last |
|
2293 // we don't know if the key is aligned, hence not using load-execute form |
|
2294 load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask); |
|
2295 __ pxor (xmm_result, xmm_temp); |
|
2296 for (int offset = 0x20; offset <= 0xa0; offset += 0x10) { |
|
2297 aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask); |
|
2298 } |
|
2299 __ cmpl(keylen, 0); |
|
2300 __ jcc(Assembler::equal, L_doLast); |
|
2301 // only in 192 and 256 bit keys |
|
2302 aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask); |
|
2303 aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask); |
|
2304 __ subl(keylen, 2); |
|
2305 __ jcc(Assembler::equal, L_doLast); |
|
2306 // only in 256 bit keys |
|
2307 aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask); |
|
2308 aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask); |
|
2309 |
|
2310 __ BIND(L_doLast); |
|
2311 // for decryption the aesdeclast operation is always on key+0x00 |
|
2312 load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask); |
|
2313 __ aesdeclast(xmm_result, xmm_temp); |
|
2314 |
|
2315 __ movdqu(Address(to, 0), xmm_result); // store the result |
|
2316 |
|
2317 __ xorptr(rax, rax); // return 0 |
|
2318 __ pop(rsi); |
|
2319 __ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2320 __ ret(0); |
|
2321 |
|
2322 return start; |
|
2323 } |
|
2324 |
|
2325 void handleSOERegisters(bool saving) { |
|
2326 const int saveFrameSizeInBytes = 4 * wordSize; |
|
2327 const Address saved_rbx (rbp, -3 * wordSize); |
|
2328 const Address saved_rsi (rbp, -2 * wordSize); |
|
2329 const Address saved_rdi (rbp, -1 * wordSize); |
|
2330 |
|
2331 if (saving) { |
|
2332 __ subptr(rsp, saveFrameSizeInBytes); |
|
2333 __ movptr(saved_rsi, rsi); |
|
2334 __ movptr(saved_rdi, rdi); |
|
2335 __ movptr(saved_rbx, rbx); |
|
2336 } else { |
|
2337 // restoring |
|
2338 __ movptr(rsi, saved_rsi); |
|
2339 __ movptr(rdi, saved_rdi); |
|
2340 __ movptr(rbx, saved_rbx); |
|
2341 } |
|
2342 } |
|
2343 |
|
2344 // Arguments: |
|
2345 // |
|
2346 // Inputs: |
|
2347 // c_rarg0 - source byte array address |
|
2348 // c_rarg1 - destination byte array address |
|
2349 // c_rarg2 - K (key) in little endian int array |
|
2350 // c_rarg3 - r vector byte array address |
|
2351 // c_rarg4 - input length |
|
2352 // |
|
2353 address generate_cipherBlockChaining_encryptAESCrypt() { |
|
2354 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); |
|
2355 __ align(CodeEntryAlignment); |
|
2356 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); |
|
2357 address start = __ pc(); |
|
2358 |
|
2359 Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; |
|
2360 const Register from = rsi; // source array address |
|
2361 const Register to = rdx; // destination array address |
|
2362 const Register key = rcx; // key array address |
|
2363 const Register rvec = rdi; // r byte array initialized from initvector array address |
|
2364 // and left with the results of the last encryption block |
|
2365 const Register len_reg = rbx; // src len (must be multiple of blocksize 16) |
|
2366 const Register pos = rax; |
|
2367 |
|
2368 // xmm register assignments for the loops below |
|
2369 const XMMRegister xmm_result = xmm0; |
|
2370 const XMMRegister xmm_temp = xmm1; |
|
2371 // first 6 keys preloaded into xmm2-xmm7 |
|
2372 const int XMM_REG_NUM_KEY_FIRST = 2; |
|
2373 const int XMM_REG_NUM_KEY_LAST = 7; |
|
2374 const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); |
|
2375 |
|
2376 __ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2377 handleSOERegisters(true /*saving*/); |
|
2378 |
|
2379 // load registers from incoming parameters |
|
2380 const Address from_param(rbp, 8+0); |
|
2381 const Address to_param (rbp, 8+4); |
|
2382 const Address key_param (rbp, 8+8); |
|
2383 const Address rvec_param (rbp, 8+12); |
|
2384 const Address len_param (rbp, 8+16); |
|
2385 __ movptr(from , from_param); |
|
2386 __ movptr(to , to_param); |
|
2387 __ movptr(key , key_param); |
|
2388 __ movptr(rvec , rvec_param); |
|
2389 __ movptr(len_reg , len_param); |
|
2390 |
|
2391 const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front |
|
2392 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2393 // load up xmm regs 2 thru 7 with keys 0-5 |
|
2394 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2395 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); |
|
2396 offset += 0x10; |
|
2397 } |
|
2398 |
|
2399 __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec |
|
2400 |
|
2401 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) |
|
2402 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2403 __ cmpl(rax, 44); |
|
2404 __ jcc(Assembler::notEqual, L_key_192_256); |
|
2405 |
|
2406 // 128 bit code follows here |
|
2407 __ movptr(pos, 0); |
|
2408 __ align(OptoLoopAlignment); |
|
2409 __ BIND(L_loopTop_128); |
|
2410 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
|
2411 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2412 |
|
2413 __ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2414 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2415 __ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2416 } |
|
2417 for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { |
|
2418 aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2419 } |
|
2420 load_key(xmm_temp, key, 0xa0); |
|
2421 __ aesenclast(xmm_result, xmm_temp); |
|
2422 |
|
2423 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2424 // no need to store r to memory until we exit |
|
2425 __ addptr(pos, AESBlockSize); |
|
2426 __ subptr(len_reg, AESBlockSize); |
|
2427 __ jcc(Assembler::notEqual, L_loopTop_128); |
|
2428 |
|
2429 __ BIND(L_exit); |
|
2430 __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object |
|
2431 |
|
2432 handleSOERegisters(false /*restoring*/); |
|
2433 __ movl(rax, 0); // return 0 (why?) |
|
2434 __ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2435 __ ret(0); |
|
2436 |
|
2437 __ BIND(L_key_192_256); |
|
2438 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
|
2439 __ cmpl(rax, 52); |
|
2440 __ jcc(Assembler::notEqual, L_key_256); |
|
2441 |
|
2442 // 192-bit code follows here (could be changed to use more xmm registers) |
|
2443 __ movptr(pos, 0); |
|
2444 __ align(OptoLoopAlignment); |
|
2445 __ BIND(L_loopTop_192); |
|
2446 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
|
2447 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2448 |
|
2449 __ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2450 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2451 __ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2452 } |
|
2453 for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { |
|
2454 aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2455 } |
|
2456 load_key(xmm_temp, key, 0xc0); |
|
2457 __ aesenclast(xmm_result, xmm_temp); |
|
2458 |
|
2459 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2460 // no need to store r to memory until we exit |
|
2461 __ addptr(pos, AESBlockSize); |
|
2462 __ subptr(len_reg, AESBlockSize); |
|
2463 __ jcc(Assembler::notEqual, L_loopTop_192); |
|
2464 __ jmp(L_exit); |
|
2465 |
|
2466 __ BIND(L_key_256); |
|
2467 // 256-bit code follows here (could be changed to use more xmm registers) |
|
2468 __ movptr(pos, 0); |
|
2469 __ align(OptoLoopAlignment); |
|
2470 __ BIND(L_loopTop_256); |
|
2471 __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input |
|
2472 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2473 |
|
2474 __ pxor (xmm_result, xmm_key0); // do the aes rounds |
|
2475 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2476 __ aesenc(xmm_result, as_XMMRegister(rnum)); |
|
2477 } |
|
2478 for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { |
|
2479 aes_enc_key(xmm_result, xmm_temp, key, key_offset); |
|
2480 } |
|
2481 load_key(xmm_temp, key, 0xe0); |
|
2482 __ aesenclast(xmm_result, xmm_temp); |
|
2483 |
|
2484 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2485 // no need to store r to memory until we exit |
|
2486 __ addptr(pos, AESBlockSize); |
|
2487 __ subptr(len_reg, AESBlockSize); |
|
2488 __ jcc(Assembler::notEqual, L_loopTop_256); |
|
2489 __ jmp(L_exit); |
|
2490 |
|
2491 return start; |
|
2492 } |
|
2493 |
|
2494 |
|
2495 // CBC AES Decryption. |
|
2496 // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. |
|
2497 // |
|
2498 // Arguments: |
|
2499 // |
|
2500 // Inputs: |
|
2501 // c_rarg0 - source byte array address |
|
2502 // c_rarg1 - destination byte array address |
|
2503 // c_rarg2 - K (key) in little endian int array |
|
2504 // c_rarg3 - r vector byte array address |
|
2505 // c_rarg4 - input length |
|
2506 // |
|
2507 |
|
2508 address generate_cipherBlockChaining_decryptAESCrypt() { |
|
2509 assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support"); |
|
2510 __ align(CodeEntryAlignment); |
|
2511 StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); |
|
2512 address start = __ pc(); |
|
2513 |
|
2514 Label L_exit, L_key_192_256, L_key_256; |
|
2515 Label L_singleBlock_loopTop_128; |
|
2516 Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256; |
|
2517 const Register from = rsi; // source array address |
|
2518 const Register to = rdx; // destination array address |
|
2519 const Register key = rcx; // key array address |
|
2520 const Register rvec = rdi; // r byte array initialized from initvector array address |
|
2521 // and left with the results of the last encryption block |
|
2522 const Register len_reg = rbx; // src len (must be multiple of blocksize 16) |
|
2523 const Register pos = rax; |
|
2524 |
|
2525 // xmm register assignments for the loops below |
|
2526 const XMMRegister xmm_result = xmm0; |
|
2527 const XMMRegister xmm_temp = xmm1; |
|
2528 // first 6 keys preloaded into xmm2-xmm7 |
|
2529 const int XMM_REG_NUM_KEY_FIRST = 2; |
|
2530 const int XMM_REG_NUM_KEY_LAST = 7; |
|
2531 const int FIRST_NON_REG_KEY_offset = 0x70; |
|
2532 const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); |
|
2533 |
|
2534 __ enter(); // required for proper stackwalking of RuntimeStub frame |
|
2535 handleSOERegisters(true /*saving*/); |
|
2536 |
|
2537 // load registers from incoming parameters |
|
2538 const Address from_param(rbp, 8+0); |
|
2539 const Address to_param (rbp, 8+4); |
|
2540 const Address key_param (rbp, 8+8); |
|
2541 const Address rvec_param (rbp, 8+12); |
|
2542 const Address len_param (rbp, 8+16); |
|
2543 __ movptr(from , from_param); |
|
2544 __ movptr(to , to_param); |
|
2545 __ movptr(key , key_param); |
|
2546 __ movptr(rvec , rvec_param); |
|
2547 __ movptr(len_reg , len_param); |
|
2548 |
|
2549 // the java expanded key ordering is rotated one position from what we want |
|
2550 // so we start from 0x10 here and hit 0x00 last |
|
2551 const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front |
|
2552 __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr())); |
|
2553 // load up xmm regs 2 thru 6 with first 5 keys |
|
2554 for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2555 load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); |
|
2556 offset += 0x10; |
|
2557 } |
|
2558 |
|
2559 // inside here, use the rvec register to point to previous block cipher |
|
2560 // with which we xor at the end of each newly decrypted block |
|
2561 const Register prev_block_cipher_ptr = rvec; |
|
2562 |
|
2563 // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) |
|
2564 __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); |
|
2565 __ cmpl(rax, 44); |
|
2566 __ jcc(Assembler::notEqual, L_key_192_256); |
|
2567 |
|
2568 |
|
2569 // 128-bit code follows here, parallelized |
|
2570 __ movptr(pos, 0); |
|
2571 __ align(OptoLoopAlignment); |
|
2572 __ BIND(L_singleBlock_loopTop_128); |
|
2573 __ cmpptr(len_reg, 0); // any blocks left?? |
|
2574 __ jcc(Assembler::equal, L_exit); |
|
2575 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2576 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2577 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2578 __ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2579 } |
|
2580 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) { // 128-bit runs up to key offset a0 |
|
2581 aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2582 } |
|
2583 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2584 __ aesdeclast(xmm_result, xmm_temp); |
|
2585 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2586 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2587 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2588 // no need to store r to memory until we exit |
|
2589 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2590 __ addptr(pos, AESBlockSize); |
|
2591 __ subptr(len_reg, AESBlockSize); |
|
2592 __ jmp(L_singleBlock_loopTop_128); |
|
2593 |
|
2594 |
|
2595 __ BIND(L_exit); |
|
2596 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2597 __ movptr(rvec , rvec_param); // restore this since used in loop |
|
2598 __ movdqu(Address(rvec, 0), xmm_temp); // final value of r stored in rvec of CipherBlockChaining object |
|
2599 handleSOERegisters(false /*restoring*/); |
|
2600 __ movl(rax, 0); // return 0 (why?) |
|
2601 __ leave(); // required for proper stackwalking of RuntimeStub frame |
|
2602 __ ret(0); |
|
2603 |
|
2604 |
|
2605 __ BIND(L_key_192_256); |
|
2606 // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) |
|
2607 __ cmpl(rax, 52); |
|
2608 __ jcc(Assembler::notEqual, L_key_256); |
|
2609 |
|
2610 // 192-bit code follows here (could be optimized to use parallelism) |
|
2611 __ movptr(pos, 0); |
|
2612 __ align(OptoLoopAlignment); |
|
2613 __ BIND(L_singleBlock_loopTop_192); |
|
2614 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2615 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2616 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2617 __ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2618 } |
|
2619 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) { // 192-bit runs up to key offset c0 |
|
2620 aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2621 } |
|
2622 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2623 __ aesdeclast(xmm_result, xmm_temp); |
|
2624 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2625 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2626 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2627 // no need to store r to memory until we exit |
|
2628 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2629 __ addptr(pos, AESBlockSize); |
|
2630 __ subptr(len_reg, AESBlockSize); |
|
2631 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192); |
|
2632 __ jmp(L_exit); |
|
2633 |
|
2634 __ BIND(L_key_256); |
|
2635 // 256-bit code follows here (could be optimized to use parallelism) |
|
2636 __ movptr(pos, 0); |
|
2637 __ align(OptoLoopAlignment); |
|
2638 __ BIND(L_singleBlock_loopTop_256); |
|
2639 __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input |
|
2640 __ pxor (xmm_result, xmm_key_first); // do the aes dec rounds |
|
2641 for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { |
|
2642 __ aesdec(xmm_result, as_XMMRegister(rnum)); |
|
2643 } |
|
2644 for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) { // 256-bit runs up to key offset e0 |
|
2645 aes_dec_key(xmm_result, xmm_temp, key, key_offset); |
|
2646 } |
|
2647 load_key(xmm_temp, key, 0x00); // final key is stored in java expanded array at offset 0 |
|
2648 __ aesdeclast(xmm_result, xmm_temp); |
|
2649 __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00)); |
|
2650 __ pxor (xmm_result, xmm_temp); // xor with the current r vector |
|
2651 __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output |
|
2652 // no need to store r to memory until we exit |
|
2653 __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0)); // set up new ptr |
|
2654 __ addptr(pos, AESBlockSize); |
|
2655 __ subptr(len_reg, AESBlockSize); |
|
2656 __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256); |
|
2657 __ jmp(L_exit); |
|
2658 |
|
2659 return start; |
|
2660 } |
|
2661 |
|
2662 |
2140 public: |
2663 public: |
2141 // Information about frame layout at time of blocking runtime call. |
2664 // Information about frame layout at time of blocking runtime call. |
2142 // Note that we only have to preserve callee-saved registers since |
2665 // Note that we only have to preserve callee-saved registers since |
2143 // the compilers are responsible for supplying a continuation point |
2666 // the compilers are responsible for supplying a continuation point |
2144 // if they expect all registers to be preserved. |
2667 // if they expect all registers to be preserved. |