# HG changeset patch # User kvn # Date 1441317792 25200 # Node ID 632402f18fe649b2c652bb245a45c57e39aff17c # Parent b4817ffce063b1c9cce425a78d8785eef0529d40 8132081: C2 support for Adler32 on SPARC Summary: Add C2 instrinsic support for Adler32 checksum on SPARC. Reviewed-by: kvn Contributed-by: ahmed.khawaja@oracle.com diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp --- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -177,6 +177,12 @@ if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) { warning("UseCRC32 specified, but not supported on this CPU"); } + + if (UseAdler32Intrinsics) { + warning("Adler32Intrinsics not available on this CPU."); + FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); + } + if (auxv & HWCAP_AES) { UseAES = UseAES || FLAG_IS_DEFAULT(UseAES); UseAESIntrinsics = diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp --- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -200,6 +200,11 @@ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + if (UseAdler32Intrinsics) { + warning("Adler32Intrinsics not available on this CPU."); + FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); + } + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { UseMultiplyToLenIntrinsic = true; } diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -5110,6 +5110,188 @@ return start; } +#define ADLER32_NUM_TEMPS 16 + + /** + * Arguments: + * + * Inputs: + * O0 - int adler + * O1 - byte* buff + * O2 - int len + * + * Output: + * O0 - int adler result + */ + address generate_updateBytesAdler32() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32"); + address start = __ pc(); + + Label L_cleanup_loop, L_cleanup_loop_check; + Label L_main_loop_check, L_main_loop, L_inner_loop, L_inner_loop_check; + Label L_nmax_check_done; + + // Aliases + Register s1 = O0; + Register s2 = O3; + Register buff = O1; + Register len = O2; + Register temp[ADLER32_NUM_TEMPS] = {L0, L1, L2, L3, L4, L5, L6, L7, I0, I1, I2, I3, I4, I5, G3, I7}; + + // Max number of bytes we can process before having to take the mod + // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 + unsigned long NMAX = 0x15B0; + + // Zero-out the upper bits of len + __ clruwu(len); + + // Create the mask 0xFFFF + __ set64(0x00FFFF, O4, O5); // O5 is the temp register + + // s1 is initialized to the lower 16 bits of adler + // s2 is initialized to the upper 16 bits of adler + __ srlx(O0, 16, O5); // adler >> 16 + __ and3(O0, O4, s1); // s1 = (adler & 0xFFFF) + __ and3(O5, O4, s2); // s2 = ((adler >> 16) & 0xFFFF) + + // The pipelined loop needs at least 16 elements for 1 iteration + // It does check this, but it is more effective to skip to the cleanup loop + // Setup the constant for cutoff checking + __ mov(15, O4); + + // Check if we are above the cutoff, if not go to the cleanup loop immediately + __ cmp_and_br_short(len, O4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_loop_check); + + // Free up some registers for our use + for (int i = 0; i < ADLER32_NUM_TEMPS; i++) { + __ movxtod(temp[i], as_FloatRegister(2*i)); + } + + // Loop maintenance stuff is done at the end of the loop, so skip to there + __ ba_short(L_main_loop_check); + + __ BIND(L_main_loop); + + // Prologue for inner loop + __ ldub(buff, 0, L0); + __ dec(O5); + + for (int i = 1; i < 8; i++) { + __ ldub(buff, i, temp[i]); + } + + __ inc(buff, 8); + + // Inner loop processes 16 elements at a time, might never execute if only 16 elements + // to be processed by the outter loop + __ ba_short(L_inner_loop_check); + + __ BIND(L_inner_loop); + + for (int i = 0; i < 8; i++) { + __ ldub(buff, (2*i), temp[(8+(2*i)) % ADLER32_NUM_TEMPS]); + __ add(s1, temp[i], s1); + __ ldub(buff, (2*i)+1, temp[(8+(2*i)+1) % ADLER32_NUM_TEMPS]); + __ add(s2, s1, s2); + } + + // Original temp 0-7 used and new loads to temp 0-7 issued + // temp 8-15 ready to be consumed + __ add(s1, I0, s1); + __ dec(O5); + __ add(s2, s1, s2); + __ add(s1, I1, s1); + __ inc(buff, 16); + __ add(s2, s1, s2); + + for (int i = 0; i < 6; i++) { + __ add(s1, temp[10+i], s1); + __ add(s2, s1, s2); + } + + __ BIND(L_inner_loop_check); + __ nop(); + __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_inner_loop); + + // Epilogue + for (int i = 0; i < 4; i++) { + __ ldub(buff, (2*i), temp[8+(2*i)]); + __ add(s1, temp[i], s1); + __ ldub(buff, (2*i)+1, temp[8+(2*i)+1]); + __ add(s2, s1, s2); + } + + __ add(s1, temp[4], s1); + __ inc(buff, 8); + + for (int i = 0; i < 11; i++) { + __ add(s2, s1, s2); + __ add(s1, temp[5+i], s1); + } + + __ add(s2, s1, s2); + + // Take the mod for s1 and s2 + __ set64(0xFFF1, L0, L1); + __ udivx(s1, L0, L1); + __ udivx(s2, L0, L2); + __ mulx(L0, L1, L1); + __ mulx(L0, L2, L2); + __ sub(s1, L1, s1); + __ sub(s2, L2, s2); + + // Make sure there is something left to process + __ BIND(L_main_loop_check); + __ set64(NMAX, L0, L1); + // k = len < NMAX ? len : NMAX + __ cmp_and_br_short(len, L0, Assembler::greaterEqualUnsigned, Assembler::pt, L_nmax_check_done); + __ andn(len, 0x0F, L0); // only loop a multiple of 16 times + __ BIND(L_nmax_check_done); + __ mov(L0, O5); + __ sub(len, L0, len); // len -= k + + __ srlx(O5, 4, O5); // multiplies of 16 + __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_main_loop); + + // Restore anything we used, take the mod one last time, combine and return + // Restore any registers we saved + for (int i = 0; i < ADLER32_NUM_TEMPS; i++) { + __ movdtox(as_FloatRegister(2*i), temp[i]); + } + + // There might be nothing left to process + __ ba_short(L_cleanup_loop_check); + + __ BIND(L_cleanup_loop); + __ ldub(buff, 0, O4); // load single byte form buffer + __ inc(buff); // buff++ + __ add(s1, O4, s1); // s1 += *buff++; + __ dec(len); // len-- + __ add(s1, s2, s2); // s2 += s1; + __ BIND(L_cleanup_loop_check); + __ nop(); + __ cmp_and_br_short(len, 0, Assembler::notEqual, Assembler::pt, L_cleanup_loop); + + // Take the mod one last time + __ set64(0xFFF1, O1, O2); + __ udivx(s1, O1, O2); + __ udivx(s2, O1, O5); + __ mulx(O1, O2, O2); + __ mulx(O1, O5, O5); + __ sub(s1, O2, s1); + __ sub(s2, O5, s2); + + // Combine lower bits and higher bits + __ sllx(s2, 16, s2); // s2 = s2 << 16 + __ or3(s1, s2, s1); // adler = s2 | s1 + // Final return value is in O0 + __ retl(); + __ delayed()->nop(); + + return start; + } + void generate_initial() { // Generates all stubs and initializes the entry points @@ -5206,6 +5388,11 @@ if (UseCRC32CIntrinsics) { StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); } + + // generate Adler32 intrinsics code + if (UseAdler32Intrinsics) { + StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32(); + } } diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp --- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Thu Sep 03 15:03:12 2015 -0700 @@ -41,7 +41,7 @@ enum /* platform_dependent_constants */ { // %%%%%%%% May be able to shrink this a lot code_size1 = 20000, // simply increase if too small (assembler will crash if too small) - code_size2 = 24000 // simply increase if too small (assembler will crash if too small) + code_size2 = 27000 // simply increase if too small (assembler will crash if too small) }; class Sparc { diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp --- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -377,6 +377,15 @@ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); } + if (UseVIS > 2) { + if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { + FLAG_SET_DEFAULT(UseAdler32Intrinsics, true); + } + } else if (UseAdler32Intrinsics) { + warning("SPARC Adler32 intrinsics require VIS3 instruction support. Intrinsics will be disabled."); + FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); + } + if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) ContendedPaddingWidth = cache_line_size; diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/cpu/x86/vm/vm_version_x86.cpp --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -714,6 +714,11 @@ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); } + if (UseAdler32Intrinsics) { + warning("Adler32Intrinsics not available on this CPU."); + FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); + } + // Adjust RTM (Restricted Transactional Memory) flags if (!supports_rtm() && UseRTMLocking) { // Can't continue because UseRTMLocking affects UseBiasedLocking flag diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/share/vm/classfile/vmSymbols.cpp --- a/hotspot/src/share/vm/classfile/vmSymbols.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -625,6 +625,10 @@ case vmIntrinsics::_updateDirectByteBufferCRC32C: if (!UseCRC32CIntrinsics) return true; break; + case vmIntrinsics::_updateBytesAdler32: + case vmIntrinsics::_updateByteBufferAdler32: + if (!UseAdler32Intrinsics) return true; + break; case vmIntrinsics::_copyMemory: if (!InlineArrayCopy || !InlineUnsafeOps) return true; break; diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/share/vm/classfile/vmSymbols.hpp --- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Thu Sep 03 15:03:12 2015 -0700 @@ -927,6 +927,12 @@ do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_C_name, updateByteBuffer_signature, F_S) \ do_name( updateDirectByteBuffer_C_name, "updateDirectByteBuffer") \ \ + /* support for java.util.zip.Adler32 */ \ + do_class(java_util_zip_Adler32, "java/util/zip/Adler32") \ + do_intrinsic(_updateBytesAdler32, java_util_zip_Adler32, updateBytes_C_name, updateBytes_signature, F_SN) \ + do_intrinsic(_updateByteBufferAdler32, java_util_zip_Adler32, updateByteBuffer_A_name, updateByteBuffer_signature, F_SN) \ + do_name( updateByteBuffer_A_name, "updateByteBuffer") \ + \ /* support for sun.misc.Unsafe */ \ do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \ \ diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/share/vm/opto/c2compiler.cpp --- a/hotspot/src/share/vm/opto/c2compiler.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/share/vm/opto/c2compiler.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -419,6 +419,8 @@ case vmIntrinsics::_updateByteBufferCRC32: case vmIntrinsics::_updateBytesCRC32C: case vmIntrinsics::_updateDirectByteBufferCRC32C: + case vmIntrinsics::_updateBytesAdler32: + case vmIntrinsics::_updateByteBufferAdler32: case vmIntrinsics::_profileBoolean: case vmIntrinsics::_isCompileConstant: break; diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/share/vm/opto/escape.cpp --- a/hotspot/src/share/vm/opto/escape.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/share/vm/opto/escape.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -963,6 +963,7 @@ strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 || strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 || strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32C") == 0 || + strcmp(call->as_CallLeaf()->_name, "updateBytesAdler32") == 0 || strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 || strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 || strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 || diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/share/vm/opto/library_call.cpp --- a/hotspot/src/share/vm/opto/library_call.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/share/vm/opto/library_call.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -296,6 +296,8 @@ Node* get_table_from_crc32c_class(ciInstanceKlass *crc32c_class); bool inline_updateBytesCRC32C(); bool inline_updateDirectByteBufferCRC32C(); + bool inline_updateBytesAdler32(); + bool inline_updateByteBufferAdler32(); bool inline_multiplyToLen(); bool inline_squareToLen(); bool inline_mulAdd(); @@ -699,6 +701,11 @@ case vmIntrinsics::_updateDirectByteBufferCRC32C: return inline_updateDirectByteBufferCRC32C(); + case vmIntrinsics::_updateBytesAdler32: + return inline_updateBytesAdler32(); + case vmIntrinsics::_updateByteBufferAdler32: + return inline_updateByteBufferAdler32(); + case vmIntrinsics::_profileBoolean: return inline_profileBoolean(); case vmIntrinsics::_isCompileConstant: @@ -5547,6 +5554,87 @@ return true; } +//------------------------------inline_updateBytesAdler32---------------------- +// +// Calculate Adler32 checksum for byte[] array. +// int java.util.zip.Adler32.updateBytes(int crc, byte[] buf, int off, int len) +// +bool LibraryCallKit::inline_updateBytesAdler32() { + assert(UseAdler32Intrinsics, "Adler32 Instrinsic support need"); // check if we actually need to check this flag or check a different one + assert(callee()->signature()->size() == 4, "updateBytes has 4 parameters"); + assert(callee()->holder()->is_loaded(), "Adler32 class must be loaded"); + // no receiver since it is static method + Node* crc = argument(0); // type: int + Node* src = argument(1); // type: oop + Node* offset = argument(2); // type: int + Node* length = argument(3); // type: int + + const Type* src_type = src->Value(&_gvn); + const TypeAryPtr* top_src = src_type->isa_aryptr(); + if (top_src == NULL || top_src->klass() == NULL) { + // failed array check + return false; + } + + // Figure out the size and type of the elements we will be copying. + BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (src_elem != T_BYTE) { + return false; + } + + // 'src_start' points to src array + scaled offset + Node* src_start = array_element_address(src, offset, src_elem); + + // We assume that range check is done by caller. + // TODO: generate range check (offset+length < src.length) in debug VM. + + // Call the stub. + address stubAddr = StubRoutines::updateBytesAdler32(); + const char *stubName = "updateBytesAdler32"; + + Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesAdler32_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + crc, src_start, length); + Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); + set_result(result); + return true; +} + +//------------------------------inline_updateByteBufferAdler32--------------- +// +// Calculate Adler32 checksum for DirectByteBuffer. +// int java.util.zip.Adler32.updateByteBuffer(int crc, long buf, int off, int len) +// +bool LibraryCallKit::inline_updateByteBufferAdler32() { + assert(UseAdler32Intrinsics, "Adler32 Instrinsic support need"); // check if we actually need to check this flag or check a different one + assert(callee()->signature()->size() == 5, "updateByteBuffer has 4 parameters and one is long"); + assert(callee()->holder()->is_loaded(), "Adler32 class must be loaded"); + // no receiver since it is static method + Node* crc = argument(0); // type: int + Node* src = argument(1); // type: long + Node* offset = argument(3); // type: int + Node* length = argument(4); // type: int + + src = ConvL2X(src); // adjust Java long to machine word + Node* base = _gvn.transform(new CastX2PNode(src)); + offset = ConvI2X(offset); + + // 'src_start' points to src array + scaled offset + Node* src_start = basic_plus_adr(top(), base, offset); + + // Call the stub. + address stubAddr = StubRoutines::updateBytesAdler32(); + const char *stubName = "updateBytesAdler32"; + + Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesAdler32_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + crc, src_start, length); + + Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); + set_result(result); + return true; +} + //----------------------------inline_reference_get---------------------------- // public T java.lang.ref.Reference.get(); bool LibraryCallKit::inline_reference_get() { diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/share/vm/opto/runtime.cpp --- a/hotspot/src/share/vm/opto/runtime.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/share/vm/opto/runtime.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -921,6 +921,28 @@ return TypeFunc::make(domain, range); } +/** +* int updateBytesAdler32(int adler, bytes* b, int off, int len) +*/ +const TypeFunc* OptoRuntime::updateBytesAdler32_Type() { + // create input type (domain) + int num_args = 3; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypeInt::INT; // crc + fields[argp++] = TypePtr::NOTNULL; // src + offset + fields[argp++] = TypeInt::INT; // len + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = TypeInt::INT; // crc result + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields); + return TypeFunc::make(domain, range); +} + // for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() { // create input type (domain) diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/share/vm/opto/runtime.hpp --- a/hotspot/src/share/vm/opto/runtime.hpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/share/vm/opto/runtime.hpp Thu Sep 03 15:03:12 2015 -0700 @@ -331,6 +331,8 @@ static const TypeFunc* updateBytesCRC32_Type(); static const TypeFunc* updateBytesCRC32C_Type(); + static const TypeFunc* updateBytesAdler32_Type(); + // leaf on stack replacement interpreter accessor types static const TypeFunc* osr_end_Type(); diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/share/vm/runtime/globals.hpp --- a/hotspot/src/share/vm/runtime/globals.hpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/share/vm/runtime/globals.hpp Thu Sep 03 15:03:12 2015 -0700 @@ -836,6 +836,9 @@ product(bool, UseCRC32CIntrinsics, false, \ "use intrinsics for java.util.zip.CRC32C") \ \ + product(bool, UseAdler32Intrinsics, false, \ + "use intrinsics for java.util.zip.Adler32") \ + \ diagnostic(ccstrlist, DisableIntrinsic, "", \ "do not expand intrinsics whose (internal) names appear here") \ \ diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/share/vm/runtime/stubRoutines.cpp --- a/hotspot/src/share/vm/runtime/stubRoutines.cpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp Thu Sep 03 15:03:12 2015 -0700 @@ -139,6 +139,7 @@ address StubRoutines::_crc_table_adr = NULL; address StubRoutines::_updateBytesCRC32C = NULL; +address StubRoutines::_updateBytesAdler32 = NULL; address StubRoutines::_multiplyToLen = NULL; address StubRoutines::_squareToLen = NULL; diff -r b4817ffce063 -r 632402f18fe6 hotspot/src/share/vm/runtime/stubRoutines.hpp --- a/hotspot/src/share/vm/runtime/stubRoutines.hpp Thu Sep 03 14:29:08 2015 -0700 +++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp Thu Sep 03 15:03:12 2015 -0700 @@ -198,6 +198,7 @@ static address _crc_table_adr; static address _updateBytesCRC32C; + static address _updateBytesAdler32; static address _multiplyToLen; static address _squareToLen; @@ -364,6 +365,7 @@ static address crc_table_addr() { return _crc_table_adr; } static address updateBytesCRC32C() { return _updateBytesCRC32C; } + static address updateBytesAdler32() { return _updateBytesAdler32; } static address multiplyToLen() {return _multiplyToLen; } static address squareToLen() {return _squareToLen; } diff -r b4817ffce063 -r 632402f18fe6 hotspot/test/compiler/intrinsics/adler32/TestAdler32.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hotspot/test/compiler/intrinsics/adler32/TestAdler32.java Thu Sep 03 15:03:12 2015 -0700 @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8132081 + * @summary C2 support for Adler32 on SPARC + * + * @run main/othervm/timeout=600 -Xbatch TestAdler32 -m + */ + +import java.nio.ByteBuffer; +import java.util.zip.Checksum; +import java.util.zip.Adler32; + +public class TestAdler32 { + public static void main(String[] args) { + int offset = Integer.getInteger("offset", 0); + int msgSize = Integer.getInteger("msgSize", 512); + boolean multi = false; + int iters = 20000; + int warmupIters = 20000; + + if (args.length > 0) { + if (args[0].equals("-m")) { + multi = true; + } else { + iters = Integer.valueOf(args[0]); + } + if (args.length > 1) { + warmupIters = Integer.valueOf(args[1]); + } + } + + if (multi) { + test_multi(warmupIters); + return; + } + + System.out.println(" offset = " + offset); + System.out.println("msgSize = " + msgSize + " bytes"); + System.out.println(" iters = " + iters); + + byte[] b = initializedBytes(msgSize, offset); + + Adler32 adler0 = new Adler32(); + Adler32 adler1 = new Adler32(); + Adler32 adler2 = new Adler32(); + + adler0.update(b, offset, msgSize); + + System.out.println("-------------------------------------------------------"); + + /* warm up */ + for (int i = 0; i < warmupIters; i++) { + adler1.reset(); + adler1.update(b, offset, msgSize); + } + + /* measure performance */ + long start = System.nanoTime(); + for (int i = 0; i < iters; i++) { + adler1.reset(); + adler1.update(b, offset, msgSize); + } + long end = System.nanoTime(); + double total = (double)(end - start)/1e9; // in seconds + double thruput = (double)msgSize*iters/1e6/total; // in MB/s + System.out.println("Adler32.update(byte[]) runtime = " + total + " seconds"); + System.out.println("Adler32.update(byte[]) throughput = " + thruput + " MB/s"); + + /* check correctness */ + for (int i = 0; i < iters; i++) { + adler1.reset(); + adler1.update(b, offset, msgSize); + if (!check(adler0, adler1)) break; + } + report("Adlers", adler0, adler1); + + System.out.println("-------------------------------------------------------"); + + ByteBuffer buf = ByteBuffer.allocateDirect(msgSize); + buf.put(b, offset, msgSize); + buf.flip(); + + /* warm up */ + for (int i = 0; i < warmupIters; i++) { + adler2.reset(); + adler2.update(buf); + buf.rewind(); + } + + /* measure performance */ + start = System.nanoTime(); + for (int i = 0; i < iters; i++) { + adler2.reset(); + adler2.update(buf); + buf.rewind(); + } + end = System.nanoTime(); + total = (double)(end - start)/1e9; // in seconds + thruput = (double)msgSize*iters/1e6/total; // in MB/s + System.out.println("Adler32.update(ByteBuffer) runtime = " + total + " seconds"); + System.out.println("Adler32.update(ByteBuffer) throughput = " + thruput + " MB/s"); + + /* check correctness */ + for (int i = 0; i < iters; i++) { + adler2.reset(); + adler2.update(buf); + buf.rewind(); + if (!check(adler0, adler2)) break; + } + report("Adlers", adler0, adler1); + + System.out.println("-------------------------------------------------------"); + } + + private static void report(String s, Checksum adler0, Checksum adler1) { + System.out.printf("%s: adler0 = %08x, adler1 = %08x\n", + s, adler0.getValue(), adler1.getValue()); + } + + private static boolean check(Checksum adler0, Checksum adler1) { + if (adler0.getValue() != adler1.getValue()) { + System.err.printf("ERROR: adler0 = %08x, adler1 = %08x\n", + adler0.getValue(), adler1.getValue()); + return false; + } + return true; + } + + private static byte[] initializedBytes(int M, int offset) { + byte[] bytes = new byte[M + offset]; + for (int i = 0; i < offset; i++) { + bytes[i] = (byte) i; + } + for (int i = offset; i < bytes.length; i++) { + bytes[i] = (byte) (i - offset); + } + return bytes; + } + + private static void test_multi(int iters) { + int len1 = 8; // the 8B/iteration loop + int len2 = 32; // the 32B/iteration loop + int len3 = 4096; // the 4KB/iteration loop + + byte[] b = initializedBytes(len3*16, 0); + int[] offsets = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512 }; + int[] sizes = { 0, 1, 2, 3, 4, 5, 6, 7, + len1, len1+1, len1+2, len1+3, len1+4, len1+5, len1+6, len1+7, + len1*2, len1*2+1, len1*2+3, len1*2+5, len1*2+7, + len2, len2+1, len2+3, len2+5, len2+7, + len2*2, len2*4, len2*8, len2*16, len2*32, len2*64, + len3, len3+1, len3+3, len3+5, len3+7, + len3*2, len3*4, len3*8, + len1+len2, len1+len2+1, len1+len2+3, len1+len2+5, len1+len2+7, + len1+len3, len1+len3+1, len1+len3+3, len1+len3+5, len1+len3+7, + len2+len3, len2+len3+1, len2+len3+3, len2+len3+5, len2+len3+7, + len1+len2+len3, len1+len2+len3+1, len1+len2+len3+3, + len1+len2+len3+5, len1+len2+len3+7, + (len1+len2+len3)*2, (len1+len2+len3)*2+1, (len1+len2+len3)*2+3, + (len1+len2+len3)*2+5, (len1+len2+len3)*2+7, + (len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3, + (len1+len2+len3)*3-5, (len1+len2+len3)*3-7 }; + Adler32[] adler0 = new Adler32[offsets.length*sizes.length]; + Adler32[] adler1 = new Adler32[offsets.length*sizes.length]; + int i, j, k; + + System.out.printf("testing %d cases ...\n", offsets.length*sizes.length); + + /* set the result from interpreter as reference */ + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + adler0[i*sizes.length + j] = new Adler32(); + adler1[i*sizes.length + j] = new Adler32(); + adler0[i*sizes.length + j].update(b, offsets[i], sizes[j]); + } + } + + /* warm up the JIT compiler and get result */ + for (k = 0; k < iters; k++) { + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + adler1[i*sizes.length + j].reset(); + adler1[i*sizes.length + j].update(b, offsets[i], sizes[j]); + } + } + } + + /* check correctness */ + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + if (!check(adler0[i*sizes.length + j], adler1[i*sizes.length + j])) { + System.out.printf("offsets[%d] = %d", i, offsets[i]); + System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]); + } + } + } + } +}