8132081: C2 support for Adler32 on SPARC
Summary: Add C2 instrinsic support for Adler32 checksum on SPARC.
Reviewed-by: kvn
Contributed-by: ahmed.khawaja@oracle.com
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -177,6 +177,12 @@
if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) {
warning("UseCRC32 specified, but not supported on this CPU");
}
+
+ if (UseAdler32Intrinsics) {
+ warning("Adler32Intrinsics not available on this CPU.");
+ FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+ }
+
if (auxv & HWCAP_AES) {
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
UseAESIntrinsics =
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -200,6 +200,11 @@
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ if (UseAdler32Intrinsics) {
+ warning("Adler32Intrinsics not available on this CPU.");
+ FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+ }
+
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
UseMultiplyToLenIntrinsic = true;
}
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -5110,6 +5110,188 @@
return start;
}
+#define ADLER32_NUM_TEMPS 16
+
+ /**
+ * Arguments:
+ *
+ * Inputs:
+ * O0 - int adler
+ * O1 - byte* buff
+ * O2 - int len
+ *
+ * Output:
+ * O0 - int adler result
+ */
+ address generate_updateBytesAdler32() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
+ address start = __ pc();
+
+ Label L_cleanup_loop, L_cleanup_loop_check;
+ Label L_main_loop_check, L_main_loop, L_inner_loop, L_inner_loop_check;
+ Label L_nmax_check_done;
+
+ // Aliases
+ Register s1 = O0;
+ Register s2 = O3;
+ Register buff = O1;
+ Register len = O2;
+ Register temp[ADLER32_NUM_TEMPS] = {L0, L1, L2, L3, L4, L5, L6, L7, I0, I1, I2, I3, I4, I5, G3, I7};
+
+ // Max number of bytes we can process before having to take the mod
+ // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+ unsigned long NMAX = 0x15B0;
+
+ // Zero-out the upper bits of len
+ __ clruwu(len);
+
+ // Create the mask 0xFFFF
+ __ set64(0x00FFFF, O4, O5); // O5 is the temp register
+
+ // s1 is initialized to the lower 16 bits of adler
+ // s2 is initialized to the upper 16 bits of adler
+ __ srlx(O0, 16, O5); // adler >> 16
+ __ and3(O0, O4, s1); // s1 = (adler & 0xFFFF)
+ __ and3(O5, O4, s2); // s2 = ((adler >> 16) & 0xFFFF)
+
+ // The pipelined loop needs at least 16 elements for 1 iteration
+ // It does check this, but it is more effective to skip to the cleanup loop
+ // Setup the constant for cutoff checking
+ __ mov(15, O4);
+
+ // Check if we are above the cutoff, if not go to the cleanup loop immediately
+ __ cmp_and_br_short(len, O4, Assembler::lessEqualUnsigned, Assembler::pt, L_cleanup_loop_check);
+
+ // Free up some registers for our use
+ for (int i = 0; i < ADLER32_NUM_TEMPS; i++) {
+ __ movxtod(temp[i], as_FloatRegister(2*i));
+ }
+
+ // Loop maintenance stuff is done at the end of the loop, so skip to there
+ __ ba_short(L_main_loop_check);
+
+ __ BIND(L_main_loop);
+
+ // Prologue for inner loop
+ __ ldub(buff, 0, L0);
+ __ dec(O5);
+
+ for (int i = 1; i < 8; i++) {
+ __ ldub(buff, i, temp[i]);
+ }
+
+ __ inc(buff, 8);
+
+ // Inner loop processes 16 elements at a time, might never execute if only 16 elements
+ // to be processed by the outter loop
+ __ ba_short(L_inner_loop_check);
+
+ __ BIND(L_inner_loop);
+
+ for (int i = 0; i < 8; i++) {
+ __ ldub(buff, (2*i), temp[(8+(2*i)) % ADLER32_NUM_TEMPS]);
+ __ add(s1, temp[i], s1);
+ __ ldub(buff, (2*i)+1, temp[(8+(2*i)+1) % ADLER32_NUM_TEMPS]);
+ __ add(s2, s1, s2);
+ }
+
+ // Original temp 0-7 used and new loads to temp 0-7 issued
+ // temp 8-15 ready to be consumed
+ __ add(s1, I0, s1);
+ __ dec(O5);
+ __ add(s2, s1, s2);
+ __ add(s1, I1, s1);
+ __ inc(buff, 16);
+ __ add(s2, s1, s2);
+
+ for (int i = 0; i < 6; i++) {
+ __ add(s1, temp[10+i], s1);
+ __ add(s2, s1, s2);
+ }
+
+ __ BIND(L_inner_loop_check);
+ __ nop();
+ __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_inner_loop);
+
+ // Epilogue
+ for (int i = 0; i < 4; i++) {
+ __ ldub(buff, (2*i), temp[8+(2*i)]);
+ __ add(s1, temp[i], s1);
+ __ ldub(buff, (2*i)+1, temp[8+(2*i)+1]);
+ __ add(s2, s1, s2);
+ }
+
+ __ add(s1, temp[4], s1);
+ __ inc(buff, 8);
+
+ for (int i = 0; i < 11; i++) {
+ __ add(s2, s1, s2);
+ __ add(s1, temp[5+i], s1);
+ }
+
+ __ add(s2, s1, s2);
+
+ // Take the mod for s1 and s2
+ __ set64(0xFFF1, L0, L1);
+ __ udivx(s1, L0, L1);
+ __ udivx(s2, L0, L2);
+ __ mulx(L0, L1, L1);
+ __ mulx(L0, L2, L2);
+ __ sub(s1, L1, s1);
+ __ sub(s2, L2, s2);
+
+ // Make sure there is something left to process
+ __ BIND(L_main_loop_check);
+ __ set64(NMAX, L0, L1);
+ // k = len < NMAX ? len : NMAX
+ __ cmp_and_br_short(len, L0, Assembler::greaterEqualUnsigned, Assembler::pt, L_nmax_check_done);
+ __ andn(len, 0x0F, L0); // only loop a multiple of 16 times
+ __ BIND(L_nmax_check_done);
+ __ mov(L0, O5);
+ __ sub(len, L0, len); // len -= k
+
+ __ srlx(O5, 4, O5); // multiplies of 16
+ __ cmp_and_br_short(O5, 0, Assembler::notEqual, Assembler::pt, L_main_loop);
+
+ // Restore anything we used, take the mod one last time, combine and return
+ // Restore any registers we saved
+ for (int i = 0; i < ADLER32_NUM_TEMPS; i++) {
+ __ movdtox(as_FloatRegister(2*i), temp[i]);
+ }
+
+ // There might be nothing left to process
+ __ ba_short(L_cleanup_loop_check);
+
+ __ BIND(L_cleanup_loop);
+ __ ldub(buff, 0, O4); // load single byte form buffer
+ __ inc(buff); // buff++
+ __ add(s1, O4, s1); // s1 += *buff++;
+ __ dec(len); // len--
+ __ add(s1, s2, s2); // s2 += s1;
+ __ BIND(L_cleanup_loop_check);
+ __ nop();
+ __ cmp_and_br_short(len, 0, Assembler::notEqual, Assembler::pt, L_cleanup_loop);
+
+ // Take the mod one last time
+ __ set64(0xFFF1, O1, O2);
+ __ udivx(s1, O1, O2);
+ __ udivx(s2, O1, O5);
+ __ mulx(O1, O2, O2);
+ __ mulx(O1, O5, O5);
+ __ sub(s1, O2, s1);
+ __ sub(s2, O5, s2);
+
+ // Combine lower bits and higher bits
+ __ sllx(s2, 16, s2); // s2 = s2 << 16
+ __ or3(s1, s2, s1); // adler = s2 | s1
+ // Final return value is in O0
+ __ retl();
+ __ delayed()->nop();
+
+ return start;
+ }
+
void generate_initial() {
// Generates all stubs and initializes the entry points
@@ -5206,6 +5388,11 @@
if (UseCRC32CIntrinsics) {
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
}
+
+ // generate Adler32 intrinsics code
+ if (UseAdler32Intrinsics) {
+ StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
+ }
}
--- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp Thu Sep 03 15:03:12 2015 -0700
@@ -41,7 +41,7 @@
enum /* platform_dependent_constants */ {
// %%%%%%%% May be able to shrink this a lot
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
- code_size2 = 24000 // simply increase if too small (assembler will crash if too small)
+ code_size2 = 27000 // simply increase if too small (assembler will crash if too small)
};
class Sparc {
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -377,6 +377,15 @@
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
}
+ if (UseVIS > 2) {
+ if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
+ FLAG_SET_DEFAULT(UseAdler32Intrinsics, true);
+ }
+ } else if (UseAdler32Intrinsics) {
+ warning("SPARC Adler32 intrinsics require VIS3 instruction support. Intrinsics will be disabled.");
+ FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+ }
+
if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
(cache_line_size > ContendedPaddingWidth))
ContendedPaddingWidth = cache_line_size;
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -714,6 +714,11 @@
FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
}
+ if (UseAdler32Intrinsics) {
+ warning("Adler32Intrinsics not available on this CPU.");
+ FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+ }
+
// Adjust RTM (Restricted Transactional Memory) flags
if (!supports_rtm() && UseRTMLocking) {
// Can't continue because UseRTMLocking affects UseBiasedLocking flag
--- a/hotspot/src/share/vm/classfile/vmSymbols.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -625,6 +625,10 @@
case vmIntrinsics::_updateDirectByteBufferCRC32C:
if (!UseCRC32CIntrinsics) return true;
break;
+ case vmIntrinsics::_updateBytesAdler32:
+ case vmIntrinsics::_updateByteBufferAdler32:
+ if (!UseAdler32Intrinsics) return true;
+ break;
case vmIntrinsics::_copyMemory:
if (!InlineArrayCopy || !InlineUnsafeOps) return true;
break;
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp Thu Sep 03 15:03:12 2015 -0700
@@ -927,6 +927,12 @@
do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_C_name, updateByteBuffer_signature, F_S) \
do_name( updateDirectByteBuffer_C_name, "updateDirectByteBuffer") \
\
+ /* support for java.util.zip.Adler32 */ \
+ do_class(java_util_zip_Adler32, "java/util/zip/Adler32") \
+ do_intrinsic(_updateBytesAdler32, java_util_zip_Adler32, updateBytes_C_name, updateBytes_signature, F_SN) \
+ do_intrinsic(_updateByteBufferAdler32, java_util_zip_Adler32, updateByteBuffer_A_name, updateByteBuffer_signature, F_SN) \
+ do_name( updateByteBuffer_A_name, "updateByteBuffer") \
+ \
/* support for sun.misc.Unsafe */ \
do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \
\
--- a/hotspot/src/share/vm/opto/c2compiler.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/opto/c2compiler.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -419,6 +419,8 @@
case vmIntrinsics::_updateByteBufferCRC32:
case vmIntrinsics::_updateBytesCRC32C:
case vmIntrinsics::_updateDirectByteBufferCRC32C:
+ case vmIntrinsics::_updateBytesAdler32:
+ case vmIntrinsics::_updateByteBufferAdler32:
case vmIntrinsics::_profileBoolean:
case vmIntrinsics::_isCompileConstant:
break;
--- a/hotspot/src/share/vm/opto/escape.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/opto/escape.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -963,6 +963,7 @@
strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 ||
strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32C") == 0 ||
+ strcmp(call->as_CallLeaf()->_name, "updateBytesAdler32") == 0 ||
strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
--- a/hotspot/src/share/vm/opto/library_call.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/opto/library_call.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -296,6 +296,8 @@
Node* get_table_from_crc32c_class(ciInstanceKlass *crc32c_class);
bool inline_updateBytesCRC32C();
bool inline_updateDirectByteBufferCRC32C();
+ bool inline_updateBytesAdler32();
+ bool inline_updateByteBufferAdler32();
bool inline_multiplyToLen();
bool inline_squareToLen();
bool inline_mulAdd();
@@ -699,6 +701,11 @@
case vmIntrinsics::_updateDirectByteBufferCRC32C:
return inline_updateDirectByteBufferCRC32C();
+ case vmIntrinsics::_updateBytesAdler32:
+ return inline_updateBytesAdler32();
+ case vmIntrinsics::_updateByteBufferAdler32:
+ return inline_updateByteBufferAdler32();
+
case vmIntrinsics::_profileBoolean:
return inline_profileBoolean();
case vmIntrinsics::_isCompileConstant:
@@ -5547,6 +5554,87 @@
return true;
}
+//------------------------------inline_updateBytesAdler32----------------------
+//
+// Calculate Adler32 checksum for byte[] array.
+// int java.util.zip.Adler32.updateBytes(int crc, byte[] buf, int off, int len)
+//
+bool LibraryCallKit::inline_updateBytesAdler32() {
+ assert(UseAdler32Intrinsics, "Adler32 Instrinsic support need"); // check if we actually need to check this flag or check a different one
+ assert(callee()->signature()->size() == 4, "updateBytes has 4 parameters");
+ assert(callee()->holder()->is_loaded(), "Adler32 class must be loaded");
+ // no receiver since it is static method
+ Node* crc = argument(0); // type: int
+ Node* src = argument(1); // type: oop
+ Node* offset = argument(2); // type: int
+ Node* length = argument(3); // type: int
+
+ const Type* src_type = src->Value(&_gvn);
+ const TypeAryPtr* top_src = src_type->isa_aryptr();
+ if (top_src == NULL || top_src->klass() == NULL) {
+ // failed array check
+ return false;
+ }
+
+ // Figure out the size and type of the elements we will be copying.
+ BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+ if (src_elem != T_BYTE) {
+ return false;
+ }
+
+ // 'src_start' points to src array + scaled offset
+ Node* src_start = array_element_address(src, offset, src_elem);
+
+ // We assume that range check is done by caller.
+ // TODO: generate range check (offset+length < src.length) in debug VM.
+
+ // Call the stub.
+ address stubAddr = StubRoutines::updateBytesAdler32();
+ const char *stubName = "updateBytesAdler32";
+
+ Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesAdler32_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ crc, src_start, length);
+ Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
+ set_result(result);
+ return true;
+}
+
+//------------------------------inline_updateByteBufferAdler32---------------
+//
+// Calculate Adler32 checksum for DirectByteBuffer.
+// int java.util.zip.Adler32.updateByteBuffer(int crc, long buf, int off, int len)
+//
+bool LibraryCallKit::inline_updateByteBufferAdler32() {
+ assert(UseAdler32Intrinsics, "Adler32 Instrinsic support need"); // check if we actually need to check this flag or check a different one
+ assert(callee()->signature()->size() == 5, "updateByteBuffer has 4 parameters and one is long");
+ assert(callee()->holder()->is_loaded(), "Adler32 class must be loaded");
+ // no receiver since it is static method
+ Node* crc = argument(0); // type: int
+ Node* src = argument(1); // type: long
+ Node* offset = argument(3); // type: int
+ Node* length = argument(4); // type: int
+
+ src = ConvL2X(src); // adjust Java long to machine word
+ Node* base = _gvn.transform(new CastX2PNode(src));
+ offset = ConvI2X(offset);
+
+ // 'src_start' points to src array + scaled offset
+ Node* src_start = basic_plus_adr(top(), base, offset);
+
+ // Call the stub.
+ address stubAddr = StubRoutines::updateBytesAdler32();
+ const char *stubName = "updateBytesAdler32";
+
+ Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesAdler32_Type(),
+ stubAddr, stubName, TypePtr::BOTTOM,
+ crc, src_start, length);
+
+ Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
+ set_result(result);
+ return true;
+}
+
//----------------------------inline_reference_get----------------------------
// public T java.lang.ref.Reference.get();
bool LibraryCallKit::inline_reference_get() {
--- a/hotspot/src/share/vm/opto/runtime.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/opto/runtime.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -921,6 +921,28 @@
return TypeFunc::make(domain, range);
}
+/**
+* int updateBytesAdler32(int adler, bytes* b, int off, int len)
+*/
+const TypeFunc* OptoRuntime::updateBytesAdler32_Type() {
+ // create input type (domain)
+ int num_args = 3;
+ int argcnt = num_args;
+ const Type** fields = TypeTuple::fields(argcnt);
+ int argp = TypeFunc::Parms;
+ fields[argp++] = TypeInt::INT; // crc
+ fields[argp++] = TypePtr::NOTNULL; // src + offset
+ fields[argp++] = TypeInt::INT; // len
+ assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+ // result type needed
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = TypeInt::INT; // crc result
+ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
+ return TypeFunc::make(domain, range);
+}
+
// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
// create input type (domain)
--- a/hotspot/src/share/vm/opto/runtime.hpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/opto/runtime.hpp Thu Sep 03 15:03:12 2015 -0700
@@ -331,6 +331,8 @@
static const TypeFunc* updateBytesCRC32_Type();
static const TypeFunc* updateBytesCRC32C_Type();
+ static const TypeFunc* updateBytesAdler32_Type();
+
// leaf on stack replacement interpreter accessor types
static const TypeFunc* osr_end_Type();
--- a/hotspot/src/share/vm/runtime/globals.hpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp Thu Sep 03 15:03:12 2015 -0700
@@ -836,6 +836,9 @@
product(bool, UseCRC32CIntrinsics, false, \
"use intrinsics for java.util.zip.CRC32C") \
\
+ product(bool, UseAdler32Intrinsics, false, \
+ "use intrinsics for java.util.zip.Adler32") \
+ \
diagnostic(ccstrlist, DisableIntrinsic, "", \
"do not expand intrinsics whose (internal) names appear here") \
\
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp Thu Sep 03 15:03:12 2015 -0700
@@ -139,6 +139,7 @@
address StubRoutines::_crc_table_adr = NULL;
address StubRoutines::_updateBytesCRC32C = NULL;
+address StubRoutines::_updateBytesAdler32 = NULL;
address StubRoutines::_multiplyToLen = NULL;
address StubRoutines::_squareToLen = NULL;
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp Thu Sep 03 14:29:08 2015 -0700
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp Thu Sep 03 15:03:12 2015 -0700
@@ -198,6 +198,7 @@
static address _crc_table_adr;
static address _updateBytesCRC32C;
+ static address _updateBytesAdler32;
static address _multiplyToLen;
static address _squareToLen;
@@ -364,6 +365,7 @@
static address crc_table_addr() { return _crc_table_adr; }
static address updateBytesCRC32C() { return _updateBytesCRC32C; }
+ static address updateBytesAdler32() { return _updateBytesAdler32; }
static address multiplyToLen() {return _multiplyToLen; }
static address squareToLen() {return _squareToLen; }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/compiler/intrinsics/adler32/TestAdler32.java Thu Sep 03 15:03:12 2015 -0700
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8132081
+ * @summary C2 support for Adler32 on SPARC
+ *
+ * @run main/othervm/timeout=600 -Xbatch TestAdler32 -m
+ */
+
+import java.nio.ByteBuffer;
+import java.util.zip.Checksum;
+import java.util.zip.Adler32;
+
+public class TestAdler32 {
+ public static void main(String[] args) {
+ int offset = Integer.getInteger("offset", 0);
+ int msgSize = Integer.getInteger("msgSize", 512);
+ boolean multi = false;
+ int iters = 20000;
+ int warmupIters = 20000;
+
+ if (args.length > 0) {
+ if (args[0].equals("-m")) {
+ multi = true;
+ } else {
+ iters = Integer.valueOf(args[0]);
+ }
+ if (args.length > 1) {
+ warmupIters = Integer.valueOf(args[1]);
+ }
+ }
+
+ if (multi) {
+ test_multi(warmupIters);
+ return;
+ }
+
+ System.out.println(" offset = " + offset);
+ System.out.println("msgSize = " + msgSize + " bytes");
+ System.out.println(" iters = " + iters);
+
+ byte[] b = initializedBytes(msgSize, offset);
+
+ Adler32 adler0 = new Adler32();
+ Adler32 adler1 = new Adler32();
+ Adler32 adler2 = new Adler32();
+
+ adler0.update(b, offset, msgSize);
+
+ System.out.println("-------------------------------------------------------");
+
+ /* warm up */
+ for (int i = 0; i < warmupIters; i++) {
+ adler1.reset();
+ adler1.update(b, offset, msgSize);
+ }
+
+ /* measure performance */
+ long start = System.nanoTime();
+ for (int i = 0; i < iters; i++) {
+ adler1.reset();
+ adler1.update(b, offset, msgSize);
+ }
+ long end = System.nanoTime();
+ double total = (double)(end - start)/1e9; // in seconds
+ double thruput = (double)msgSize*iters/1e6/total; // in MB/s
+ System.out.println("Adler32.update(byte[]) runtime = " + total + " seconds");
+ System.out.println("Adler32.update(byte[]) throughput = " + thruput + " MB/s");
+
+ /* check correctness */
+ for (int i = 0; i < iters; i++) {
+ adler1.reset();
+ adler1.update(b, offset, msgSize);
+ if (!check(adler0, adler1)) break;
+ }
+ report("Adlers", adler0, adler1);
+
+ System.out.println("-------------------------------------------------------");
+
+ ByteBuffer buf = ByteBuffer.allocateDirect(msgSize);
+ buf.put(b, offset, msgSize);
+ buf.flip();
+
+ /* warm up */
+ for (int i = 0; i < warmupIters; i++) {
+ adler2.reset();
+ adler2.update(buf);
+ buf.rewind();
+ }
+
+ /* measure performance */
+ start = System.nanoTime();
+ for (int i = 0; i < iters; i++) {
+ adler2.reset();
+ adler2.update(buf);
+ buf.rewind();
+ }
+ end = System.nanoTime();
+ total = (double)(end - start)/1e9; // in seconds
+ thruput = (double)msgSize*iters/1e6/total; // in MB/s
+ System.out.println("Adler32.update(ByteBuffer) runtime = " + total + " seconds");
+ System.out.println("Adler32.update(ByteBuffer) throughput = " + thruput + " MB/s");
+
+ /* check correctness */
+ for (int i = 0; i < iters; i++) {
+ adler2.reset();
+ adler2.update(buf);
+ buf.rewind();
+ if (!check(adler0, adler2)) break;
+ }
+ report("Adlers", adler0, adler1);
+
+ System.out.println("-------------------------------------------------------");
+ }
+
+ private static void report(String s, Checksum adler0, Checksum adler1) {
+ System.out.printf("%s: adler0 = %08x, adler1 = %08x\n",
+ s, adler0.getValue(), adler1.getValue());
+ }
+
+ private static boolean check(Checksum adler0, Checksum adler1) {
+ if (adler0.getValue() != adler1.getValue()) {
+ System.err.printf("ERROR: adler0 = %08x, adler1 = %08x\n",
+ adler0.getValue(), adler1.getValue());
+ return false;
+ }
+ return true;
+ }
+
+ private static byte[] initializedBytes(int M, int offset) {
+ byte[] bytes = new byte[M + offset];
+ for (int i = 0; i < offset; i++) {
+ bytes[i] = (byte) i;
+ }
+ for (int i = offset; i < bytes.length; i++) {
+ bytes[i] = (byte) (i - offset);
+ }
+ return bytes;
+ }
+
+ private static void test_multi(int iters) {
+ int len1 = 8; // the 8B/iteration loop
+ int len2 = 32; // the 32B/iteration loop
+ int len3 = 4096; // the 4KB/iteration loop
+
+ byte[] b = initializedBytes(len3*16, 0);
+ int[] offsets = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512 };
+ int[] sizes = { 0, 1, 2, 3, 4, 5, 6, 7,
+ len1, len1+1, len1+2, len1+3, len1+4, len1+5, len1+6, len1+7,
+ len1*2, len1*2+1, len1*2+3, len1*2+5, len1*2+7,
+ len2, len2+1, len2+3, len2+5, len2+7,
+ len2*2, len2*4, len2*8, len2*16, len2*32, len2*64,
+ len3, len3+1, len3+3, len3+5, len3+7,
+ len3*2, len3*4, len3*8,
+ len1+len2, len1+len2+1, len1+len2+3, len1+len2+5, len1+len2+7,
+ len1+len3, len1+len3+1, len1+len3+3, len1+len3+5, len1+len3+7,
+ len2+len3, len2+len3+1, len2+len3+3, len2+len3+5, len2+len3+7,
+ len1+len2+len3, len1+len2+len3+1, len1+len2+len3+3,
+ len1+len2+len3+5, len1+len2+len3+7,
+ (len1+len2+len3)*2, (len1+len2+len3)*2+1, (len1+len2+len3)*2+3,
+ (len1+len2+len3)*2+5, (len1+len2+len3)*2+7,
+ (len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3,
+ (len1+len2+len3)*3-5, (len1+len2+len3)*3-7 };
+ Adler32[] adler0 = new Adler32[offsets.length*sizes.length];
+ Adler32[] adler1 = new Adler32[offsets.length*sizes.length];
+ int i, j, k;
+
+ System.out.printf("testing %d cases ...\n", offsets.length*sizes.length);
+
+ /* set the result from interpreter as reference */
+ for (i = 0; i < offsets.length; i++) {
+ for (j = 0; j < sizes.length; j++) {
+ adler0[i*sizes.length + j] = new Adler32();
+ adler1[i*sizes.length + j] = new Adler32();
+ adler0[i*sizes.length + j].update(b, offsets[i], sizes[j]);
+ }
+ }
+
+ /* warm up the JIT compiler and get result */
+ for (k = 0; k < iters; k++) {
+ for (i = 0; i < offsets.length; i++) {
+ for (j = 0; j < sizes.length; j++) {
+ adler1[i*sizes.length + j].reset();
+ adler1[i*sizes.length + j].update(b, offsets[i], sizes[j]);
+ }
+ }
+ }
+
+ /* check correctness */
+ for (i = 0; i < offsets.length; i++) {
+ for (j = 0; j < sizes.length; j++) {
+ if (!check(adler0[i*sizes.length + j], adler1[i*sizes.length + j])) {
+ System.out.printf("offsets[%d] = %d", i, offsets[i]);
+ System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]);
+ }
+ }
+ }
+ }
+}