--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -1588,6 +1588,185 @@
}
//
+ // Generate stub for disjoint short fill. If "aligned" is true, the
+ // "to" address is assumed to be heapword aligned.
+ //
+ // Arguments for generated stub:
+ // to: O0
+ // value: O1
+ // count: O2 treated as signed
+ //
+ address generate_fill(BasicType t, bool aligned, const char* name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ const Register to = O0; // source array address
+ const Register value = O1; // fill value
+ const Register count = O2; // elements count
+ // O3 is used as a temp register
+
+ assert_clean_int(count, O3); // Make sure 'count' is clean int.
+
+ Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
+ Label L_fill_2_bytes, L_fill_4_bytes, L_fill_32_bytes;
+
+ int shift = -1;
+ switch (t) {
+ case T_BYTE:
+ shift = 2;
+ break;
+ case T_SHORT:
+ shift = 1;
+ break;
+ case T_INT:
+ shift = 0;
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ BLOCK_COMMENT("Entry:");
+
+ if (t == T_BYTE) {
+ // Zero extend value
+ __ and3(value, 0xff, value);
+ __ sllx(value, 8, O3);
+ __ or3(value, O3, value);
+ }
+ if (t == T_SHORT) {
+ // Zero extend value
+ __ sethi(0xffff0000, O3);
+ __ andn(value, O3, value);
+ }
+ if (t == T_BYTE || t == T_SHORT) {
+ __ sllx(value, 16, O3);
+ __ or3(value, O3, value);
+ }
+
+ __ cmp(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
+ __ brx(Assembler::lessUnsigned, false, Assembler::pn, L_fill_4_bytes); // use unsigned cmp
+ __ delayed()->andcc(count, 1<<shift, G0);
+
+ if (!aligned && (t == T_BYTE || t == T_SHORT)) {
+ // align source address at 4 bytes address boundary
+ if (t == T_BYTE) {
+ // One byte misalignment happens only for byte arrays
+ __ andcc(to, 1, G0);
+ __ br(Assembler::zero, false, Assembler::pt, L_skip_align1);
+ __ delayed()->nop();
+ __ stb(value, to, 0);
+ __ inc(to, 1);
+ __ dec(count, 1);
+ __ BIND(L_skip_align1);
+ }
+ // Two bytes misalignment happens only for byte and short (char) arrays
+ __ andcc(to, 2, G0);
+ __ br(Assembler::zero, false, Assembler::pt, L_skip_align2);
+ __ delayed()->nop();
+ __ sth(value, to, 0);
+ __ inc(to, 2);
+ __ dec(count, 1 << (shift - 1));
+ __ BIND(L_skip_align2);
+ }
+#ifdef _LP64
+ if (!aligned) {
+#endif
+ // align to 8 bytes, we know we are 4 byte aligned to start
+ __ andcc(to, 7, G0);
+ __ br(Assembler::zero, false, Assembler::pt, L_fill_32_bytes);
+ __ delayed()->nop();
+ __ stw(value, to, 0);
+ __ inc(to, 4);
+ __ dec(count, 1 << shift);
+ __ BIND(L_fill_32_bytes);
+#ifdef _LP64
+ }
+#endif
+
+ Label L_check_fill_8_bytes;
+ // Fill 32-byte chunks
+ __ subcc(count, 8 << shift, count);
+ __ brx(Assembler::less, false, Assembler::pt, L_check_fill_8_bytes);
+ __ delayed()->nop();
+
+ if (t == T_INT) {
+ // Zero extend value
+ __ srl(value, 0, value);
+ }
+ if (t == T_BYTE || t == T_SHORT || t == T_INT) {
+ __ sllx(value, 32, O3);
+ __ or3(value, O3, value);
+ }
+
+ Label L_fill_32_bytes_loop;
+ __ align(16);
+ __ BIND(L_fill_32_bytes_loop);
+
+ __ stx(value, to, 0);
+ __ stx(value, to, 8);
+ __ stx(value, to, 16);
+ __ stx(value, to, 24);
+
+ __ subcc(count, 8 << shift, count);
+ __ brx(Assembler::greaterEqual, false, Assembler::pt, L_fill_32_bytes_loop);
+ __ delayed()->add(to, 32, to);
+
+ __ BIND(L_check_fill_8_bytes);
+ __ addcc(count, 8 << shift, count);
+ __ brx(Assembler::zero, false, Assembler::pn, L_exit);
+ __ delayed()->subcc(count, 1 << (shift + 1), count);
+ __ brx(Assembler::less, false, Assembler::pn, L_fill_4_bytes);
+ __ delayed()->andcc(count, 1<<shift, G0);
+
+ //
+ // length is too short, just fill 8 bytes at a time
+ //
+ Label L_fill_8_bytes_loop;
+ __ BIND(L_fill_8_bytes_loop);
+ __ stx(value, to, 0);
+ __ subcc(count, 1 << (shift + 1), count);
+ __ brx(Assembler::greaterEqual, false, Assembler::pn, L_fill_8_bytes_loop);
+ __ delayed()->add(to, 8, to);
+
+ // fill trailing 4 bytes
+ __ andcc(count, 1<<shift, G0); // in delay slot of branches
+ __ BIND(L_fill_4_bytes);
+ __ brx(Assembler::zero, false, Assembler::pt, L_fill_2_bytes);
+ if (t == T_BYTE || t == T_SHORT) {
+ __ delayed()->andcc(count, 1<<(shift-1), G0);
+ } else {
+ __ delayed()->nop();
+ }
+ __ stw(value, to, 0);
+ if (t == T_BYTE || t == T_SHORT) {
+ __ inc(to, 4);
+ // fill trailing 2 bytes
+ __ andcc(count, 1<<(shift-1), G0); // in delay slot of branches
+ __ BIND(L_fill_2_bytes);
+ __ brx(Assembler::zero, false, Assembler::pt, L_fill_byte);
+ __ delayed()->andcc(count, 1, count);
+ __ sth(value, to, 0);
+ if (t == T_BYTE) {
+ __ inc(to, 2);
+ // fill trailing byte
+ __ andcc(count, 1, count); // in delay slot of branches
+ __ BIND(L_fill_byte);
+ __ brx(Assembler::zero, false, Assembler::pt, L_exit);
+ __ delayed()->nop();
+ __ stb(value, to, 0);
+ } else {
+ __ BIND(L_fill_byte);
+ }
+ } else {
+ __ BIND(L_fill_2_bytes);
+ }
+ __ BIND(L_exit);
+ __ retl();
+ __ delayed()->mov(G0, O0); // return 0
+ return start;
+ }
+
+ //
// Generate stub for conjoint short copy. If "aligned" is true, the
// "from" and "to" addresses are assumed to be heapword aligned.
//
@@ -2855,6 +3034,13 @@
StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy");
StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
+
+ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
+ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
+ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
+ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
+ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
+ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
}
void generate_initial() {
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -8767,6 +8767,186 @@
bind(DONE);
}
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+void MacroAssembler::generate_fill(BasicType t, bool aligned,
+ Register to, Register value, Register count,
+ Register rtmp, XMMRegister xtmp) {
+ assert_different_registers(to, value, count, rtmp);
+ Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
+ Label L_fill_2_bytes, L_fill_4_bytes;
+
+ int shift = -1;
+ switch (t) {
+ case T_BYTE:
+ shift = 2;
+ break;
+ case T_SHORT:
+ shift = 1;
+ break;
+ case T_INT:
+ shift = 0;
+ break;
+ default: ShouldNotReachHere();
+ }
+
+ if (t == T_BYTE) {
+ andl(value, 0xff);
+ movl(rtmp, value);
+ shll(rtmp, 8);
+ orl(value, rtmp);
+ }
+ if (t == T_SHORT) {
+ andl(value, 0xffff);
+ }
+ if (t == T_BYTE || t == T_SHORT) {
+ movl(rtmp, value);
+ shll(rtmp, 16);
+ orl(value, rtmp);
+ }
+
+ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
+ jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
+ if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
+ // align source address at 4 bytes address boundary
+ if (t == T_BYTE) {
+ // One byte misalignment happens only for byte arrays
+ testptr(to, 1);
+ jccb(Assembler::zero, L_skip_align1);
+ movb(Address(to, 0), value);
+ increment(to);
+ decrement(count);
+ BIND(L_skip_align1);
+ }
+ // Two bytes misalignment happens only for byte and short (char) arrays
+ testptr(to, 2);
+ jccb(Assembler::zero, L_skip_align2);
+ movw(Address(to, 0), value);
+ addptr(to, 2);
+ subl(count, 1<<(shift-1));
+ BIND(L_skip_align2);
+ }
+ if (UseSSE < 2) {
+ Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
+ // Fill 32-byte chunks
+ subl(count, 8 << shift);
+ jcc(Assembler::less, L_check_fill_8_bytes);
+ align(16);
+
+ BIND(L_fill_32_bytes_loop);
+
+ for (int i = 0; i < 32; i += 4) {
+ movl(Address(to, i), value);
+ }
+
+ addptr(to, 32);
+ subl(count, 8 << shift);
+ jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
+ BIND(L_check_fill_8_bytes);
+ addl(count, 8 << shift);
+ jccb(Assembler::zero, L_exit);
+ jmpb(L_fill_8_bytes);
+
+ //
+ // length is too short, just fill qwords
+ //
+ BIND(L_fill_8_bytes_loop);
+ movl(Address(to, 0), value);
+ movl(Address(to, 4), value);
+ addptr(to, 8);
+ BIND(L_fill_8_bytes);
+ subl(count, 1 << (shift + 1));
+ jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
+ // fall through to fill 4 bytes
+ } else {
+ Label L_fill_32_bytes;
+ if (!UseUnalignedLoadStores) {
+ // align to 8 bytes, we know we are 4 byte aligned to start
+ testptr(to, 4);
+ jccb(Assembler::zero, L_fill_32_bytes);
+ movl(Address(to, 0), value);
+ addptr(to, 4);
+ subl(count, 1<<shift);
+ }
+ BIND(L_fill_32_bytes);
+ {
+ assert( UseSSE >= 2, "supported cpu only" );
+ Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
+ // Fill 32-byte chunks
+ movdl(xtmp, value);
+ pshufd(xtmp, xtmp, 0);
+
+ subl(count, 8 << shift);
+ jcc(Assembler::less, L_check_fill_8_bytes);
+ align(16);
+
+ BIND(L_fill_32_bytes_loop);
+
+ if (UseUnalignedLoadStores) {
+ movdqu(Address(to, 0), xtmp);
+ movdqu(Address(to, 16), xtmp);
+ } else {
+ movq(Address(to, 0), xtmp);
+ movq(Address(to, 8), xtmp);
+ movq(Address(to, 16), xtmp);
+ movq(Address(to, 24), xtmp);
+ }
+
+ addptr(to, 32);
+ subl(count, 8 << shift);
+ jcc(Assembler::greaterEqual, L_fill_32_bytes_loop);
+ BIND(L_check_fill_8_bytes);
+ addl(count, 8 << shift);
+ jccb(Assembler::zero, L_exit);
+ jmpb(L_fill_8_bytes);
+
+ //
+ // length is too short, just fill qwords
+ //
+ BIND(L_fill_8_bytes_loop);
+ movq(Address(to, 0), xtmp);
+ addptr(to, 8);
+ BIND(L_fill_8_bytes);
+ subl(count, 1 << (shift + 1));
+ jcc(Assembler::greaterEqual, L_fill_8_bytes_loop);
+ }
+ }
+ // fill trailing 4 bytes
+ BIND(L_fill_4_bytes);
+ testl(count, 1<<shift);
+ jccb(Assembler::zero, L_fill_2_bytes);
+ movl(Address(to, 0), value);
+ if (t == T_BYTE || t == T_SHORT) {
+ addptr(to, 4);
+ BIND(L_fill_2_bytes);
+ // fill trailing 2 bytes
+ testl(count, 1<<(shift-1));
+ jccb(Assembler::zero, L_fill_byte);
+ movw(Address(to, 0), value);
+ if (t == T_BYTE) {
+ addptr(to, 2);
+ BIND(L_fill_byte);
+ // fill trailing byte
+ testl(count, 1);
+ jccb(Assembler::zero, L_exit);
+ movb(Address(to, 0), value);
+ } else {
+ BIND(L_fill_byte);
+ }
+ } else {
+ BIND(L_fill_2_bytes);
+ }
+ BIND(L_exit);
+}
+#undef BIND
+#undef BLOCK_COMMENT
+
+
Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
switch (cond) {
// Note some conditions are synonyms for others
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp Wed Sep 01 00:40:05 2010 -0700
@@ -2242,6 +2242,11 @@
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2);
+ // Fill primitive arrays
+ void generate_fill(BasicType t, bool aligned,
+ Register to, Register value, Register count,
+ Register rtmp, XMMRegister xtmp);
+
#undef VIRTUAL
};
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -1039,6 +1039,33 @@
}
+ address generate_fill(BasicType t, bool aligned, const char *name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ BLOCK_COMMENT("Entry:");
+
+ const Register to = rdi; // source array address
+ const Register value = rdx; // value
+ const Register count = rsi; // elements count
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ __ push(rsi);
+ __ push(rdi);
+ __ movptr(to , Address(rsp, 12+ 4));
+ __ movl(value, Address(rsp, 12+ 8));
+ __ movl(count, Address(rsp, 12+ 12));
+
+ __ generate_fill(t, aligned, to, value, count, rax, xmm0);
+
+ __ pop(rdi);
+ __ pop(rsi);
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+ return start;
+ }
+
address generate_conjoint_copy(BasicType t, bool aligned,
Address::ScaleFactor sf,
address nooverlap_target,
@@ -2001,6 +2028,13 @@
generate_conjoint_long_copy(entry, &entry_jlong_arraycopy,
"jlong_arraycopy");
+ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
+ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
+ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
+ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
+ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
+ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
+
StubRoutines::_arrayof_jint_disjoint_arraycopy =
StubRoutines::_jint_disjoint_arraycopy;
StubRoutines::_arrayof_oop_disjoint_arraycopy =
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -1625,6 +1625,26 @@
return start;
}
+ address generate_fill(BasicType t, bool aligned, const char *name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ BLOCK_COMMENT("Entry:");
+
+ const Register to = c_rarg0; // source array address
+ const Register value = c_rarg1; // value
+ const Register count = c_rarg2; // elements count
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+ __ generate_fill(t, aligned, to, value, count, rax, xmm0);
+
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+ return start;
+ }
+
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
@@ -2712,6 +2732,13 @@
StubRoutines::_unsafe_arraycopy = generate_unsafe_copy("unsafe_arraycopy");
StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy");
+ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
+ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
+ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
+ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
+ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
+ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
+
// We don't generate specialized code for HeapWord-aligned source
// arrays, so just use the code we've already generated
StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy;
--- a/hotspot/src/share/vm/asm/codeBuffer.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/asm/codeBuffer.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -143,13 +143,6 @@
void CodeBuffer::initialize_section_size(CodeSection* cs, csize_t size) {
assert(cs != &_insts, "insts is the memory provider, not the consumer");
-#ifdef ASSERT
- for (int n = (int)SECT_INSTS+1; n < (int)SECT_LIMIT; n++) {
- CodeSection* prevCS = code_section(n);
- if (prevCS == cs) break;
- assert(!prevCS->is_allocated(), "section allocation must be in reverse order");
- }
-#endif
csize_t slop = CodeSection::end_slop(); // margin between sections
int align = cs->alignment();
assert(is_power_of_2(align), "sanity");
@@ -199,13 +192,13 @@
_total_start = start;
_total_size = end - start;
} else {
- #ifdef ASSERT
+#ifdef ASSERT
// Clean out dangling pointers.
_total_start = badAddress;
+ _consts._start = _consts._end = badAddress;
_insts._start = _insts._end = badAddress;
_stubs._start = _stubs._end = badAddress;
- _consts._start = _consts._end = badAddress;
- #endif //ASSERT
+#endif //ASSERT
}
}
@@ -221,9 +214,9 @@
return NULL;
#else //PRODUCT
switch (n) {
+ case SECT_CONSTS: return "consts";
case SECT_INSTS: return "insts";
case SECT_STUBS: return "stubs";
- case SECT_CONSTS: return "consts";
default: return NULL;
}
#endif //PRODUCT
@@ -445,12 +438,11 @@
const CodeSection* prev_cs = NULL;
CodeSection* prev_dest_cs = NULL;
- for (int n = 0; n < (int)SECT_LIMIT; n++) {
+
+ for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) {
// figure compact layout of each section
const CodeSection* cs = code_section(n);
- address cstart = cs->start();
- address cend = cs->end();
- csize_t csize = cend - cstart;
+ csize_t csize = cs->size();
CodeSection* dest_cs = dest->code_section(n);
if (!cs->is_empty()) {
@@ -463,7 +455,7 @@
prev_dest_cs->_limit += padding;
}
#ifdef ASSERT
- if (prev_cs != NULL && prev_cs->is_frozen() && n < SECT_CONSTS) {
+ if (prev_cs != NULL && prev_cs->is_frozen() && n < (SECT_LIMIT - 1)) {
// Make sure the ends still match up.
// This is important because a branch in a frozen section
// might target code in a following section, via a Label,
@@ -492,22 +484,18 @@
assert(dest->verify_section_allocation(), "final configuration works");
}
-csize_t CodeBuffer::total_offset_of(address addr) const {
- csize_t code_size_so_far = 0;
- for (int n = 0; n < (int)SECT_LIMIT; n++) {
- const CodeSection* cs = code_section(n);
- if (!cs->is_empty()) {
- code_size_so_far = cs->align_at_start(code_size_so_far);
+csize_t CodeBuffer::total_offset_of(CodeSection* cs) const {
+ csize_t size_so_far = 0;
+ for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) {
+ const CodeSection* cur_cs = code_section(n);
+ if (!cur_cs->is_empty()) {
+ size_so_far = cur_cs->align_at_start(size_so_far);
}
- if (cs->contains2(addr)) {
- return code_size_so_far + (addr - cs->start());
+ if (cur_cs->index() == cs->index()) {
+ return size_so_far;
}
- code_size_so_far += cs->size();
+ size_so_far += cur_cs->size();
}
-#ifndef PRODUCT
- tty->print_cr("Dangling address " PTR_FORMAT " in:", addr);
- ((CodeBuffer*)this)->print();
-#endif
ShouldNotReachHere();
return -1;
}
@@ -533,7 +521,7 @@
csize_t code_end_so_far = 0;
csize_t code_point_so_far = 0;
- for (int n = 0; n < (int)SECT_LIMIT; n++) {
+ for (int n = (int) SECT_FIRST; n < (int)SECT_LIMIT; n++) {
// pull relocs out of each section
const CodeSection* cs = code_section(n);
assert(!(cs->is_empty() && cs->locs_count() > 0), "sanity");
@@ -635,11 +623,14 @@
ICache::invalidate_range(dest_blob->code_begin(), dest_blob->code_size());
}
-// Move all my code into another code buffer.
-// Consult applicable relocs to repair embedded addresses.
+// Move all my code into another code buffer. Consult applicable
+// relocs to repair embedded addresses. The layout in the destination
+// CodeBuffer is different to the source CodeBuffer: the destination
+// CodeBuffer gets the final layout (consts, insts, stubs in order of
+// ascending address).
void CodeBuffer::relocate_code_to(CodeBuffer* dest) const {
DEBUG_ONLY(address dest_end = dest->_total_start + dest->_total_size);
- for (int n = 0; n < (int)SECT_LIMIT; n++) {
+ for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) {
// pull code out of each section
const CodeSection* cs = code_section(n);
if (cs->is_empty()) continue; // skip trivial section
@@ -681,20 +672,19 @@
csize_t* new_capacity) {
csize_t new_total_cap = 0;
- int prev_n = -1;
- for (int n = 0; n < (int)SECT_LIMIT; n++) {
+ for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) {
const CodeSection* sect = code_section(n);
if (!sect->is_empty()) {
- // Compute initial padding; assign it to the previous non-empty guy.
- // Cf. compute_final_layout.
+ // Compute initial padding; assign it to the previous section,
+ // even if it's empty (e.g. consts section can be empty).
+ // Cf. compute_final_layout
csize_t padding = sect->align_at_start(new_total_cap) - new_total_cap;
if (padding != 0) {
new_total_cap += padding;
- assert(prev_n >= 0, "sanity");
- new_capacity[prev_n] += padding;
+ assert(n - 1 >= SECT_FIRST, "sanity");
+ new_capacity[n - 1] += padding;
}
- prev_n = n;
}
csize_t exp = sect->size(); // 100% increase
@@ -774,11 +764,11 @@
this->_before_expand = bxp;
// Give each section its required (expanded) capacity.
- for (int n = (int)SECT_LIMIT-1; n >= SECT_INSTS; n--) {
+ for (int n = (int)SECT_LIMIT-1; n >= SECT_FIRST; n--) {
CodeSection* cb_sect = cb.code_section(n);
CodeSection* this_sect = code_section(n);
if (new_capacity[n] == 0) continue; // already nulled out
- if (n > SECT_INSTS) {
+ if (n != SECT_INSTS) {
cb.initialize_section_size(cb_sect, new_capacity[n]);
}
assert(cb_sect->capacity() >= new_capacity[n], "big enough");
@@ -844,17 +834,22 @@
assert(tstart >= _blob->content_begin(), "sanity");
assert(tend <= _blob->content_end(), "sanity");
}
- address tcheck = tstart; // advancing pointer to verify disjointness
- for (int n = 0; n < (int)SECT_LIMIT; n++) {
+ // Verify disjointness.
+ for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) {
CodeSection* sect = code_section(n);
- if (!sect->is_allocated()) continue;
- assert(sect->start() >= tcheck, "sanity");
- tcheck = sect->start();
- assert((intptr_t)tcheck % sect->alignment() == 0
+ if (!sect->is_allocated() || sect->is_empty()) continue;
+ assert((intptr_t)sect->start() % sect->alignment() == 0
|| sect->is_empty() || _blob == NULL,
"start is aligned");
- assert(sect->end() >= tcheck, "sanity");
- assert(sect->end() <= tend, "sanity");
+ for (int m = (int) SECT_FIRST; m < (int) SECT_LIMIT; m++) {
+ CodeSection* other = code_section(m);
+ if (!other->is_allocated() || other == sect) continue;
+ assert(!other->contains(sect->start() ), "sanity");
+ // limit is an exclusive address and can be the start of another
+ // section.
+ assert(!other->contains(sect->limit() - 1), "sanity");
+ }
+ assert(sect->end() <= tend, "sanity");
}
return true;
}
--- a/hotspot/src/share/vm/asm/codeBuffer.hpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/asm/codeBuffer.hpp Wed Sep 01 00:40:05 2010 -0700
@@ -289,10 +289,12 @@
public:
typedef int csize_t; // code size type; would be size_t except for history
enum {
- // Here is the list of all possible sections, in order of ascending address.
+ // Here is the list of all possible sections. The order reflects
+ // the final layout.
+ SECT_FIRST = 0,
+ SECT_CONSTS = SECT_FIRST, // Non-instruction data: Floats, jump tables, etc.
SECT_INSTS, // Executable instructions.
SECT_STUBS, // Outbound trampolines for supporting call sites.
- SECT_CONSTS, // Non-instruction data: Floats, jump tables, etc.
SECT_LIMIT, SECT_NONE = -1
};
@@ -304,9 +306,9 @@
const char* _name;
+ CodeSection _consts; // constants, jump tables
CodeSection _insts; // instructions (the main section)
CodeSection _stubs; // stubs (call site support), deopt, exception handling
- CodeSection _consts; // constants, jump tables
CodeBuffer* _before_expand; // dead buffer, from before the last expansion
@@ -334,9 +336,9 @@
}
void initialize(address code_start, csize_t code_size) {
+ _consts.initialize_outer(this, SECT_CONSTS);
_insts.initialize_outer(this, SECT_INSTS);
_stubs.initialize_outer(this, SECT_STUBS);
- _consts.initialize_outer(this, SECT_CONSTS);
_total_start = code_start;
_total_size = code_size;
// Initialize the main section:
@@ -414,16 +416,16 @@
// construction.
void initialize(csize_t code_size, csize_t locs_size);
+ CodeSection* consts() { return &_consts; }
CodeSection* insts() { return &_insts; }
CodeSection* stubs() { return &_stubs; }
- CodeSection* consts() { return &_consts; }
- // present sections in order; return NULL at end; insts is #0, etc.
+ // present sections in order; return NULL at end; consts is #0, etc.
CodeSection* code_section(int n) {
- // This makes the slightly questionable but portable assumption that
- // the various members (_insts, _stubs, etc.) are adjacent in the
- // layout of CodeBuffer.
- CodeSection* cs = &_insts + n;
+ // This makes the slightly questionable but portable assumption
+ // that the various members (_consts, _insts, _stubs, etc.) are
+ // adjacent in the layout of CodeBuffer.
+ CodeSection* cs = &_consts + n;
assert(cs->index() == n || !cs->is_allocated(), "sanity");
return cs;
}
@@ -484,9 +486,9 @@
// CodeBlob).
csize_t total_content_size() const;
- // combined offset (relative to start of insts) of given address,
- // as eventually found in the final CodeBlob
- csize_t total_offset_of(address addr) const;
+ // Combined offset (relative to start of first section) of given
+ // section, as eventually found in the final CodeBlob.
+ csize_t total_offset_of(CodeSection* cs) const;
// allocated size of all relocation data, including index, rounded up
csize_t total_relocation_size() const;
--- a/hotspot/src/share/vm/code/codeBlob.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/code/codeBlob.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -92,7 +92,7 @@
_header_size = header_size;
_relocation_size = round_to(cb->total_relocation_size(), oopSize);
_content_offset = align_code_offset(header_size + _relocation_size);
- _code_offset = _content_offset + cb->total_offset_of(cb->insts()->start());
+ _code_offset = _content_offset + cb->total_offset_of(cb->insts());
_data_offset = _content_offset + round_to(cb->total_content_size(), oopSize);
assert(_data_offset <= size, "codeBlob is too small");
--- a/hotspot/src/share/vm/code/nmethod.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/code/nmethod.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -87,9 +87,9 @@
int nmethod_count;
int total_size;
int relocation_size;
+ int consts_size;
int insts_size;
int stub_size;
- int consts_size;
int scopes_data_size;
int scopes_pcs_size;
int dependencies_size;
@@ -101,9 +101,9 @@
nmethod_count += 1;
total_size += nm->size();
relocation_size += nm->relocation_size();
+ consts_size += nm->consts_size();
insts_size += nm->insts_size();
stub_size += nm->stub_size();
- consts_size += nm->consts_size();
oops_size += nm->oops_size();
scopes_data_size += nm->scopes_data_size();
scopes_pcs_size += nm->scopes_pcs_size();
@@ -116,9 +116,9 @@
tty->print_cr("Statistics for %d bytecoded nmethods:", nmethod_count);
if (total_size != 0) tty->print_cr(" total in heap = %d", total_size);
if (relocation_size != 0) tty->print_cr(" relocation = %d", relocation_size);
+ if (consts_size != 0) tty->print_cr(" constants = %d", consts_size);
if (insts_size != 0) tty->print_cr(" main code = %d", insts_size);
if (stub_size != 0) tty->print_cr(" stub code = %d", stub_size);
- if (consts_size != 0) tty->print_cr(" constants = %d", consts_size);
if (oops_size != 0) tty->print_cr(" oops = %d", oops_size);
if (scopes_data_size != 0) tty->print_cr(" scopes data = %d", scopes_data_size);
if (scopes_pcs_size != 0) tty->print_cr(" scopes pcs = %d", scopes_pcs_size);
@@ -404,9 +404,9 @@
int nmethod::total_size() const {
return
+ consts_size() +
insts_size() +
stub_size() +
- consts_size() +
scopes_data_size() +
scopes_pcs_size() +
handler_table_size() +
@@ -789,13 +789,17 @@
_orig_pc_offset = orig_pc_offset;
// Section offsets
- _consts_offset = content_offset() + code_buffer->total_offset_of(code_buffer->consts()->start());
- _stub_offset = content_offset() + code_buffer->total_offset_of(code_buffer->stubs()->start());
+ _consts_offset = content_offset() + code_buffer->total_offset_of(code_buffer->consts());
+ _stub_offset = content_offset() + code_buffer->total_offset_of(code_buffer->stubs());
// Exception handler and deopt handler are in the stub section
_exception_offset = _stub_offset + offsets->value(CodeOffsets::Exceptions);
_deoptimize_offset = _stub_offset + offsets->value(CodeOffsets::Deopt);
- _deoptimize_mh_offset = _stub_offset + offsets->value(CodeOffsets::DeoptMH);
+ if (has_method_handle_invokes()) {
+ _deoptimize_mh_offset = _stub_offset + offsets->value(CodeOffsets::DeoptMH);
+ } else {
+ _deoptimize_mh_offset = -1;
+ }
if (offsets->value(CodeOffsets::UnwindHandler) != -1) {
_unwind_handler_offset = code_offset() + offsets->value(CodeOffsets::UnwindHandler);
} else {
@@ -885,9 +889,9 @@
xtty->print(" address='" INTPTR_FORMAT "'", (intptr_t) this);
LOG_OFFSET(xtty, relocation);
+ LOG_OFFSET(xtty, consts);
LOG_OFFSET(xtty, insts);
LOG_OFFSET(xtty, stub);
- LOG_OFFSET(xtty, consts);
LOG_OFFSET(xtty, scopes_data);
LOG_OFFSET(xtty, scopes_pcs);
LOG_OFFSET(xtty, dependencies);
@@ -2336,6 +2340,10 @@
relocation_begin(),
relocation_end(),
relocation_size());
+ if (consts_size () > 0) tty->print_cr(" constants [" INTPTR_FORMAT "," INTPTR_FORMAT "] = %d",
+ consts_begin(),
+ consts_end(),
+ consts_size());
if (insts_size () > 0) tty->print_cr(" main code [" INTPTR_FORMAT "," INTPTR_FORMAT "] = %d",
insts_begin(),
insts_end(),
@@ -2344,10 +2352,6 @@
stub_begin(),
stub_end(),
stub_size());
- if (consts_size () > 0) tty->print_cr(" constants [" INTPTR_FORMAT "," INTPTR_FORMAT "] = %d",
- consts_begin(),
- consts_end(),
- consts_size());
if (oops_size () > 0) tty->print_cr(" oops [" INTPTR_FORMAT "," INTPTR_FORMAT "] = %d",
oops_begin(),
oops_end(),
@@ -2372,10 +2376,6 @@
nul_chk_table_begin(),
nul_chk_table_end(),
nul_chk_table_size());
- if (oops_size () > 0) tty->print_cr(" oops [" INTPTR_FORMAT "," INTPTR_FORMAT "] = %d",
- oops_begin(),
- oops_end(),
- oops_size());
}
void nmethod::print_code() {
--- a/hotspot/src/share/vm/code/nmethod.hpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/code/nmethod.hpp Wed Sep 01 00:40:05 2010 -0700
@@ -143,8 +143,8 @@
#ifdef HAVE_DTRACE_H
int _trap_offset;
#endif // def HAVE_DTRACE_H
+ int _consts_offset;
int _stub_offset;
- int _consts_offset;
int _oops_offset; // offset to where embedded oop table begins (inside data)
int _scopes_data_offset;
int _scopes_pcs_offset;
@@ -336,16 +336,16 @@
bool is_compiled_by_shark() const;
// boundaries for different parts
- address insts_begin () const { return code_begin(); }
+ address consts_begin () const { return header_begin() + _consts_offset ; }
+ address consts_end () const { return header_begin() + code_offset() ; }
+ address insts_begin () const { return header_begin() + code_offset() ; }
address insts_end () const { return header_begin() + _stub_offset ; }
+ address stub_begin () const { return header_begin() + _stub_offset ; }
+ address stub_end () const { return header_begin() + _oops_offset ; }
address exception_begin () const { return header_begin() + _exception_offset ; }
address deopt_handler_begin () const { return header_begin() + _deoptimize_offset ; }
address deopt_mh_handler_begin() const { return header_begin() + _deoptimize_mh_offset ; }
address unwind_handler_begin () const { return _unwind_handler_offset != -1 ? (header_begin() + _unwind_handler_offset) : NULL; }
- address stub_begin () const { return header_begin() + _stub_offset ; }
- address stub_end () const { return header_begin() + _consts_offset ; }
- address consts_begin () const { return header_begin() + _consts_offset ; }
- address consts_end () const { return header_begin() + _oops_offset ; }
oop* oops_begin () const { return (oop*) (header_begin() + _oops_offset) ; }
oop* oops_end () const { return (oop*) (header_begin() + _scopes_data_offset) ; }
@@ -361,9 +361,9 @@
address nul_chk_table_end () const { return header_begin() + _nmethod_end_offset ; }
// Sizes
+ int consts_size () const { return consts_end () - consts_begin (); }
int insts_size () const { return insts_end () - insts_begin (); }
int stub_size () const { return stub_end () - stub_begin (); }
- int consts_size () const { return consts_end () - consts_begin (); }
int oops_size () const { return (address) oops_end () - (address) oops_begin (); }
int scopes_data_size () const { return scopes_data_end () - scopes_data_begin (); }
int scopes_pcs_size () const { return (intptr_t) scopes_pcs_end () - (intptr_t) scopes_pcs_begin (); }
@@ -374,9 +374,9 @@
int total_size () const;
// Containment
+ bool consts_contains (address addr) const { return consts_begin () <= addr && addr < consts_end (); }
bool insts_contains (address addr) const { return insts_begin () <= addr && addr < insts_end (); }
bool stub_contains (address addr) const { return stub_begin () <= addr && addr < stub_end (); }
- bool consts_contains (address addr) const { return consts_begin () <= addr && addr < consts_end (); }
bool oops_contains (oop* addr) const { return oops_begin () <= addr && addr < oops_end (); }
bool scopes_data_contains (address addr) const { return scopes_data_begin () <= addr && addr < scopes_data_end (); }
bool scopes_pcs_contains (PcDesc* addr) const { return scopes_pcs_begin () <= addr && addr < scopes_pcs_end (); }
--- a/hotspot/src/share/vm/code/relocInfo.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/code/relocInfo.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -128,7 +128,16 @@
_code = nm;
_current = nm->relocation_begin() - 1;
_end = nm->relocation_end();
- _addr = (address) nm->code_begin();
+ _addr = nm->content_begin();
+
+ // Initialize code sections.
+ _section_start[CodeBuffer::SECT_CONSTS] = nm->consts_begin();
+ _section_start[CodeBuffer::SECT_INSTS ] = nm->insts_begin() ;
+ _section_start[CodeBuffer::SECT_STUBS ] = nm->stub_begin() ;
+
+ _section_end [CodeBuffer::SECT_CONSTS] = nm->consts_end() ;
+ _section_end [CodeBuffer::SECT_INSTS ] = nm->insts_end() ;
+ _section_end [CodeBuffer::SECT_STUBS ] = nm->stub_end() ;
assert(!has_current(), "just checking");
assert(begin == NULL || begin >= nm->code_begin(), "in bounds");
@@ -146,9 +155,11 @@
_code = NULL; // Not cb->blob();
CodeBuffer* cb = cs->outer();
- assert((int)SECT_LIMIT == CodeBuffer::SECT_LIMIT, "my copy must be equal");
- for (int n = 0; n < (int)SECT_LIMIT; n++) {
- _section_start[n] = cb->code_section(n)->start();
+ assert((int) SECT_LIMIT == CodeBuffer::SECT_LIMIT, "my copy must be equal");
+ for (int n = (int) CodeBuffer::SECT_FIRST; n < (int) CodeBuffer::SECT_LIMIT; n++) {
+ CodeSection* cs = cb->code_section(n);
+ _section_start[n] = cs->start();
+ _section_end [n] = cs->end();
}
assert(!has_current(), "just checking");
@@ -166,6 +177,12 @@
};
+bool RelocIterator::addr_in_const() const {
+ const int n = CodeBuffer::SECT_CONSTS;
+ return section_start(n) <= addr() && addr() < section_end(n);
+}
+
+
static inline int num_cards(int code_size) {
return (code_size-1) / indexCardSize;
}
@@ -360,31 +377,12 @@
}
-address RelocIterator::compute_section_start(int n) const {
-// This routine not only computes a section start, but also
-// memoizes it for later.
-#define CACHE ((RelocIterator*)this)->_section_start[n]
- CodeBlob* cb = code();
- guarantee(cb != NULL, "must have a code blob");
- if (n == CodeBuffer::SECT_INSTS)
- return CACHE = cb->code_begin();
- assert(cb->is_nmethod(), "only nmethods have these sections");
- nmethod* nm = (nmethod*) cb;
- address res = NULL;
- switch (n) {
- case CodeBuffer::SECT_STUBS:
- res = nm->stub_begin();
- break;
- case CodeBuffer::SECT_CONSTS:
- res = nm->consts_begin();
- break;
- default:
- ShouldNotReachHere();
+void RelocIterator::initialize_misc() {
+ set_has_current(false);
+ for (int i = (int) CodeBuffer::SECT_FIRST; i < (int) CodeBuffer::SECT_LIMIT; i++) {
+ _section_start[i] = NULL; // these will be lazily computed, if needed
+ _section_end [i] = NULL;
}
- assert(nm->contains(res) || res == nm->code_end(), "tame pointer");
- CACHE = res;
- return res;
-#undef CACHE
}
--- a/hotspot/src/share/vm/code/relocInfo.hpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/code/relocInfo.hpp Wed Sep 01 00:40:05 2010 -0700
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -502,8 +502,7 @@
// }
class RelocIterator : public StackObj {
- enum { SECT_CONSTS = 2,
- SECT_LIMIT = 3 }; // must be equal to CodeBuffer::SECT_LIMIT
+ enum { SECT_LIMIT = 3 }; // must be equal to CodeBuffer::SECT_LIMIT, checked in ctor
friend class Relocation;
friend class relocInfo; // for change_reloc_info_for_address only
typedef relocInfo::relocType relocType;
@@ -521,6 +520,7 @@
// Base addresses needed to compute targets of section_word_type relocs.
address _section_start[SECT_LIMIT];
+ address _section_end [SECT_LIMIT];
void set_has_current(bool b) {
_datalen = !b ? -1 : 0;
@@ -540,14 +540,7 @@
void advance_over_prefix(); // helper method
- void initialize_misc() {
- set_has_current(false);
- for (int i = 0; i < SECT_LIMIT; i++) {
- _section_start[i] = NULL; // these will be lazily computed, if needed
- }
- }
-
- address compute_section_start(int n) const; // out-of-line helper
+ void initialize_misc();
void initialize(nmethod* nm, address begin, address limit);
@@ -598,11 +591,15 @@
bool has_current() const { return _datalen >= 0; }
void set_addr(address addr) { _addr = addr; }
- bool addr_in_const() const { return addr() >= section_start(SECT_CONSTS); }
+ bool addr_in_const() const;
address section_start(int n) const {
- address res = _section_start[n];
- return (res != NULL) ? res : compute_section_start(n);
+ assert(_section_start[n], "must be initialized");
+ return _section_start[n];
+ }
+ address section_end(int n) const {
+ assert(_section_end[n], "must be initialized");
+ return _section_end[n];
}
// The address points to the affected displacement part of the instruction.
--- a/hotspot/src/share/vm/includeDB_compiler2 Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/includeDB_compiler2 Wed Sep 01 00:40:05 2010 -0700
@@ -625,6 +625,7 @@
loopTransform.cpp loopnode.hpp
loopTransform.cpp mulnode.hpp
loopTransform.cpp rootnode.hpp
+loopTransform.cpp runtime.hpp
loopTransform.cpp subnode.hpp
loopUnswitch.cpp allocation.inline.hpp
--- a/hotspot/src/share/vm/opto/addnode.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/opto/addnode.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -705,6 +705,9 @@
}
addr = addr->in(AddPNode::Address);
}
+ if (addr != base) {
+ return -1;
+ }
return count;
}
--- a/hotspot/src/share/vm/opto/c2_globals.hpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp Wed Sep 01 00:40:05 2010 -0700
@@ -157,6 +157,12 @@
develop(bool, TraceLoopPredicate, false, \
"Trace generation of loop predicates") \
\
+ product(bool, OptimizeFill, false, \
+ "convert fill/copy loops into intrinsic") \
+ \
+ develop(bool, TraceOptimizeFill, false, \
+ "print detailed information about fill conversion") \
+ \
develop(bool, OptoCoalesce, true, \
"Use Conservative Copy Coalescing in the Register Allocator") \
\
--- a/hotspot/src/share/vm/opto/loopTransform.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -2049,11 +2049,18 @@
if (cmp->Opcode() != Op_CmpU ) {
return false;
}
- if (cmp->in(2)->Opcode() != Op_LoadRange) {
- return false;
+ Node* range = cmp->in(2);
+ if (range->Opcode() != Op_LoadRange) {
+ const TypeInt* tint = phase->_igvn.type(range)->isa_int();
+ if (!OptimizeFill || tint == NULL || tint->empty() || tint->_lo < 0) {
+ // Allow predication on positive values that aren't LoadRanges.
+ // This allows optimization of loops where the length of the
+ // array is a known value and doesn't need to be loaded back
+ // from the array.
+ return false;
+ }
}
- LoadRangeNode* lr = (LoadRangeNode*)cmp->in(2);
- if (!invar.is_invariant(lr)) { // loadRange must be invariant
+ if (!invar.is_invariant(range)) {
return false;
}
Node *iv = _head->as_CountedLoop()->phi();
@@ -2248,9 +2255,9 @@
const Node* cmp = bol->in(1)->as_Cmp();
Node* idx = cmp->in(1);
assert(!invar.is_invariant(idx), "index is variant");
- assert(cmp->in(2)->Opcode() == Op_LoadRange, "must be");
- Node* ld_rng = cmp->in(2); // LoadRangeNode
- assert(invar.is_invariant(ld_rng), "load range must be invariant");
+ assert(cmp->in(2)->Opcode() == Op_LoadRange || OptimizeFill, "must be");
+ Node* rng = cmp->in(2);
+ assert(invar.is_invariant(rng), "range must be invariant");
int scale = 1;
Node* offset = zero;
bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
@@ -2271,21 +2278,21 @@
// Perform cloning to keep Invariance state correct since the
// late schedule will place invariant things in the loop.
- ld_rng = invar.clone(ld_rng, ctrl);
+ rng = invar.clone(rng, ctrl);
if (offset && offset != zero) {
assert(invar.is_invariant(offset), "offset must be loop invariant");
offset = invar.clone(offset, ctrl);
}
// Test the lower bound
- Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng, false);
+ Node* lower_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, false);
IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
_igvn.hash_delete(lower_bound_iff);
lower_bound_iff->set_req(1, lower_bound_bol);
if (TraceLoopPredicate) tty->print_cr("lower bound check if: %d", lower_bound_iff->_idx);
// Test the upper bound
- Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, ld_rng, true);
+ Node* upper_bound_bol = rc_predicate(ctrl, scale, offset, init, limit, stride, rng, true);
IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
_igvn.hash_delete(upper_bound_iff);
upper_bound_iff->set_req(1, upper_bound_bol);
@@ -2366,3 +2373,348 @@
return hoisted;
}
+
+
+// Process all the loops in the loop tree and replace any fill
+// patterns with an intrisc version.
+bool PhaseIdealLoop::do_intrinsify_fill() {
+ bool changed = false;
+ for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
+ IdealLoopTree* lpt = iter.current();
+ changed |= intrinsify_fill(lpt);
+ }
+ return changed;
+}
+
+
+// Examine an inner loop looking for a a single store of an invariant
+// value in a unit stride loop,
+bool PhaseIdealLoop::match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
+ Node*& shift, Node*& con) {
+ const char* msg = NULL;
+ Node* msg_node = NULL;
+
+ store_value = NULL;
+ con = NULL;
+ shift = NULL;
+
+ // Process the loop looking for stores. If there are multiple
+ // stores or extra control flow give at this point.
+ CountedLoopNode* head = lpt->_head->as_CountedLoop();
+ for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
+ Node* n = lpt->_body.at(i);
+ if (n->outcnt() == 0) continue; // Ignore dead
+ if (n->is_Store()) {
+ if (store != NULL) {
+ msg = "multiple stores";
+ break;
+ }
+ int opc = n->Opcode();
+ if (opc == Op_StoreP || opc == Op_StoreN || opc == Op_StoreCM) {
+ msg = "oop fills not handled";
+ break;
+ }
+ Node* value = n->in(MemNode::ValueIn);
+ if (!lpt->is_invariant(value)) {
+ msg = "variant store value";
+ }
+ store = n;
+ store_value = value;
+ } else if (n->is_If() && n != head->loopexit()) {
+ msg = "extra control flow";
+ msg_node = n;
+ }
+ }
+
+ if (store == NULL) {
+ // No store in loop
+ return false;
+ }
+
+ if (msg == NULL && head->stride_con() != 1) {
+ // could handle negative strides too
+ if (head->stride_con() < 0) {
+ msg = "negative stride";
+ } else {
+ msg = "non-unit stride";
+ }
+ }
+
+ if (msg == NULL && !store->in(MemNode::Address)->is_AddP()) {
+ msg = "can't handle store address";
+ msg_node = store->in(MemNode::Address);
+ }
+
+ // Make sure there is an appropriate fill routine
+ BasicType t = store->as_Mem()->memory_type();
+ const char* fill_name;
+ if (msg == NULL &&
+ StubRoutines::select_fill_function(t, false, fill_name) == NULL) {
+ msg = "unsupported store";
+ msg_node = store;
+ }
+
+ if (msg != NULL) {
+#ifndef PRODUCT
+ if (TraceOptimizeFill) {
+ tty->print_cr("not fill intrinsic candidate: %s", msg);
+ if (msg_node != NULL) msg_node->dump();
+ }
+#endif
+ return false;
+ }
+
+ // Make sure the address expression can be handled. It should be
+ // head->phi * elsize + con. head->phi might have a ConvI2L.
+ Node* elements[4];
+ Node* conv = NULL;
+ int count = store->in(MemNode::Address)->as_AddP()->unpack_offsets(elements, ARRAY_SIZE(elements));
+ for (int e = 0; e < count; e++) {
+ Node* n = elements[e];
+ if (n->is_Con() && con == NULL) {
+ con = n;
+ } else if (n->Opcode() == Op_LShiftX && shift == NULL) {
+ Node* value = n->in(1);
+#ifdef _LP64
+ if (value->Opcode() == Op_ConvI2L) {
+ conv = value;
+ value = value->in(1);
+ }
+#endif
+ if (value != head->phi()) {
+ msg = "unhandled shift in address";
+ } else {
+ shift = n;
+ assert(type2aelembytes(store->as_Mem()->memory_type(), true) == 1 << shift->in(2)->get_int(), "scale should match");
+ }
+ } else if (n->Opcode() == Op_ConvI2L && conv == NULL) {
+ if (n->in(1) == head->phi()) {
+ conv = n;
+ } else {
+ msg = "unhandled input to ConvI2L";
+ }
+ } else if (n == head->phi()) {
+ // no shift, check below for allowed cases
+ } else {
+ msg = "unhandled node in address";
+ msg_node = n;
+ }
+ }
+
+ if (count == -1) {
+ msg = "malformed address expression";
+ msg_node = store;
+ }
+
+ // byte sized items won't have a shift
+ if (msg == NULL && shift == NULL && t != T_BYTE && t != T_BOOLEAN) {
+ msg = "can't find shift";
+ msg_node = store;
+ }
+
+ if (msg != NULL) {
+#ifndef PRODUCT
+ if (TraceOptimizeFill) {
+ tty->print_cr("not fill intrinsic: %s", msg);
+ if (msg_node != NULL) msg_node->dump();
+ }
+#endif
+ return false;
+ }
+
+ // No make sure all the other nodes in the loop can be handled
+ VectorSet ok(Thread::current()->resource_area());
+
+ // store related values are ok
+ ok.set(store->_idx);
+ ok.set(store->in(MemNode::Memory)->_idx);
+
+ // Loop structure is ok
+ ok.set(head->_idx);
+ ok.set(head->loopexit()->_idx);
+ ok.set(head->phi()->_idx);
+ ok.set(head->incr()->_idx);
+ ok.set(head->loopexit()->cmp_node()->_idx);
+ ok.set(head->loopexit()->in(1)->_idx);
+
+ // Address elements are ok
+ if (con) ok.set(con->_idx);
+ if (shift) ok.set(shift->_idx);
+ if (conv) ok.set(conv->_idx);
+
+ for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
+ Node* n = lpt->_body.at(i);
+ if (n->outcnt() == 0) continue; // Ignore dead
+ if (ok.test(n->_idx)) continue;
+ // Backedge projection is ok
+ if (n->is_IfTrue() && n->in(0) == head->loopexit()) continue;
+ if (!n->is_AddP()) {
+ msg = "unhandled node";
+ msg_node = n;
+ break;
+ }
+ }
+
+ // Make sure no unexpected values are used outside the loop
+ for (uint i = 0; msg == NULL && i < lpt->_body.size(); i++) {
+ Node* n = lpt->_body.at(i);
+ // These values can be replaced with other nodes if they are used
+ // outside the loop.
+ if (n == store || n == head->loopexit() || n == head->incr()) continue;
+ for (SimpleDUIterator iter(n); iter.has_next(); iter.next()) {
+ Node* use = iter.get();
+ if (!lpt->_body.contains(use)) {
+ msg = "node is used outside loop";
+ // lpt->_body.dump();
+ msg_node = n;
+ break;
+ }
+ }
+ }
+
+#ifdef ASSERT
+ if (TraceOptimizeFill) {
+ if (msg != NULL) {
+ tty->print_cr("no fill intrinsic: %s", msg);
+ if (msg_node != NULL) msg_node->dump();
+ } else {
+ tty->print_cr("fill intrinsic for:");
+ }
+ store->dump();
+ if (Verbose) {
+ lpt->_body.dump();
+ }
+ }
+#endif
+
+ return msg == NULL;
+}
+
+
+
+bool PhaseIdealLoop::intrinsify_fill(IdealLoopTree* lpt) {
+ // Only for counted inner loops
+ if (!lpt->is_counted() || !lpt->is_inner()) {
+ return false;
+ }
+
+ // Must have constant stride
+ CountedLoopNode* head = lpt->_head->as_CountedLoop();
+ if (!head->stride_is_con() || !head->is_normal_loop()) {
+ return false;
+ }
+
+ // Check that the body only contains a store of a loop invariant
+ // value that is indexed by the loop phi.
+ Node* store = NULL;
+ Node* store_value = NULL;
+ Node* shift = NULL;
+ Node* offset = NULL;
+ if (!match_fill_loop(lpt, store, store_value, shift, offset)) {
+ return false;
+ }
+
+ // Now replace the whole loop body by a call to a fill routine that
+ // covers the same region as the loop.
+ Node* base = store->in(MemNode::Address)->as_AddP()->in(AddPNode::Base);
+
+ // Build an expression for the beginning of the copy region
+ Node* index = head->init_trip();
+#ifdef _LP64
+ index = new (C, 2) ConvI2LNode(index);
+ _igvn.register_new_node_with_optimizer(index);
+#endif
+ if (shift != NULL) {
+ // byte arrays don't require a shift but others do.
+ index = new (C, 3) LShiftXNode(index, shift->in(2));
+ _igvn.register_new_node_with_optimizer(index);
+ }
+ index = new (C, 4) AddPNode(base, base, index);
+ _igvn.register_new_node_with_optimizer(index);
+ Node* from = new (C, 4) AddPNode(base, index, offset);
+ _igvn.register_new_node_with_optimizer(from);
+ // Compute the number of elements to copy
+ Node* len = new (C, 3) SubINode(head->limit(), head->init_trip());
+ _igvn.register_new_node_with_optimizer(len);
+
+ BasicType t = store->as_Mem()->memory_type();
+ bool aligned = false;
+ if (offset != NULL && head->init_trip()->is_Con()) {
+ int element_size = type2aelembytes(t);
+ aligned = (offset->find_intptr_t_type()->get_con() + head->init_trip()->get_int() * element_size) % HeapWordSize == 0;
+ }
+
+ // Build a call to the fill routine
+ const char* fill_name;
+ address fill = StubRoutines::select_fill_function(t, aligned, fill_name);
+ assert(fill != NULL, "what?");
+
+ // Convert float/double to int/long for fill routines
+ if (t == T_FLOAT) {
+ store_value = new (C, 2) MoveF2INode(store_value);
+ _igvn.register_new_node_with_optimizer(store_value);
+ } else if (t == T_DOUBLE) {
+ store_value = new (C, 2) MoveD2LNode(store_value);
+ _igvn.register_new_node_with_optimizer(store_value);
+ }
+
+ Node* mem_phi = store->in(MemNode::Memory);
+ Node* result_ctrl;
+ Node* result_mem;
+ const TypeFunc* call_type = OptoRuntime::array_fill_Type();
+ int size = call_type->domain()->cnt();
+ CallLeafNode *call = new (C, size) CallLeafNoFPNode(call_type, fill,
+ fill_name, TypeAryPtr::get_array_body_type(t));
+ call->init_req(TypeFunc::Parms+0, from);
+ call->init_req(TypeFunc::Parms+1, store_value);
+ call->init_req(TypeFunc::Parms+2, len);
+ call->init_req( TypeFunc::Control, head->init_control());
+ call->init_req( TypeFunc::I_O , C->top() ) ; // does no i/o
+ call->init_req( TypeFunc::Memory , mem_phi->in(LoopNode::EntryControl) );
+ call->init_req( TypeFunc::ReturnAdr, C->start()->proj_out(TypeFunc::ReturnAdr) );
+ call->init_req( TypeFunc::FramePtr, C->start()->proj_out(TypeFunc::FramePtr) );
+ _igvn.register_new_node_with_optimizer(call);
+ result_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control);
+ _igvn.register_new_node_with_optimizer(result_ctrl);
+ result_mem = new (C, 1) ProjNode(call,TypeFunc::Memory);
+ _igvn.register_new_node_with_optimizer(result_mem);
+
+ // If this fill is tightly coupled to an allocation and overwrites
+ // the whole body, allow it to take over the zeroing.
+ AllocateNode* alloc = AllocateNode::Ideal_allocation(base, this);
+ if (alloc != NULL && alloc->is_AllocateArray()) {
+ Node* length = alloc->as_AllocateArray()->Ideal_length();
+ if (head->limit() == length &&
+ head->init_trip() == _igvn.intcon(0)) {
+ if (TraceOptimizeFill) {
+ tty->print_cr("Eliminated zeroing in allocation");
+ }
+ alloc->maybe_set_complete(&_igvn);
+ } else {
+#ifdef ASSERT
+ if (TraceOptimizeFill) {
+ tty->print_cr("filling array but bounds don't match");
+ alloc->dump();
+ head->init_trip()->dump();
+ head->limit()->dump();
+ length->dump();
+ }
+#endif
+ }
+ }
+
+ // Redirect the old control and memory edges that are outside the loop.
+ Node* exit = head->loopexit()->proj_out(0);
+ _igvn.replace_node(exit, result_ctrl);
+ _igvn.replace_node(store, result_mem);
+ // Any uses the increment outside of the loop become the loop limit.
+ _igvn.replace_node(head->incr(), head->limit());
+
+ // Disconnect the head from the loop.
+ for (uint i = 0; i < lpt->_body.size(); i++) {
+ Node* n = lpt->_body.at(i);
+ _igvn.replace_node(n, C->top());
+ }
+
+ return true;
+}
--- a/hotspot/src/share/vm/opto/loopnode.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -1673,6 +1673,12 @@
_ltree_root->_child->loop_predication(this);
}
+ if (OptimizeFill && UseLoopPredicate && C->has_loops() && !C->major_progress()) {
+ if (do_intrinsify_fill()) {
+ C->set_major_progress();
+ }
+ }
+
// Perform iteration-splitting on inner loops. Split iterations to avoid
// range checks or one-shot null checks.
--- a/hotspot/src/share/vm/opto/loopnode.hpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/opto/loopnode.hpp Wed Sep 01 00:40:05 2010 -0700
@@ -937,6 +937,12 @@
// same block. Split thru the Region.
void do_split_if( Node *iff );
+ // Conversion of fill/copy patterns into intrisic versions
+ bool do_intrinsify_fill();
+ bool intrinsify_fill(IdealLoopTree* lpt);
+ bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
+ Node*& shift, Node*& offset);
+
private:
// Return a type based on condition control flow
const TypeInt* filtered_type( Node *n, Node* n_ctrl);
--- a/hotspot/src/share/vm/opto/memnode.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/opto/memnode.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -1547,8 +1547,8 @@
adr->is_AddP() && off != Type::OffsetBot) {
// For constant Strings treat the fields as compile time constants.
Node* base = adr->in(AddPNode::Base);
- if (base->Opcode() == Op_ConP) {
- const TypeOopPtr* t = phase->type(base)->isa_oopptr();
+ const TypeOopPtr* t = phase->type(base)->isa_oopptr();
+ if (t != NULL && t->singleton()) {
ciObject* string = t->const_oop();
ciConstant constant = string->as_instance()->field_value_by_offset(off);
if (constant.basic_type() == T_INT) {
--- a/hotspot/src/share/vm/opto/runtime.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/opto/runtime.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -645,6 +645,22 @@
}
+const TypeFunc* OptoRuntime::array_fill_Type() {
+ // create input type (domain)
+ const Type** fields = TypeTuple::fields(3);
+ fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;
+ fields[TypeFunc::Parms+1] = TypeInt::INT;
+ fields[TypeFunc::Parms+2] = TypeInt::INT;
+ const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms + 3, fields);
+
+ // create result type
+ fields = TypeTuple::fields(1);
+ fields[TypeFunc::Parms+0] = NULL; // void
+ const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+
+ return TypeFunc::make(domain, range);
+}
+
//------------- Interpreter state access for on stack replacement
const TypeFunc* OptoRuntime::osr_end_Type() {
// create input type (domain)
--- a/hotspot/src/share/vm/opto/runtime.hpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/opto/runtime.hpp Wed Sep 01 00:40:05 2010 -0700
@@ -260,6 +260,8 @@
static const TypeFunc* generic_arraycopy_Type();
static const TypeFunc* slow_arraycopy_Type(); // the full routine
+ static const TypeFunc* array_fill_Type();
+
// leaf on stack replacement interpreter accessor types
static const TypeFunc* osr_end_Type();
--- a/hotspot/src/share/vm/opto/type.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/opto/type.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -314,7 +314,7 @@
mreg2type[Op_RegL] = TypeLong::LONG;
mreg2type[Op_RegFlags] = TypeInt::CC;
- TypeAryPtr::RANGE = TypeAryPtr::make( TypePtr::BotPTR, TypeAry::make(Type::BOTTOM,TypeInt::POS), current->env()->Object_klass(), false, arrayOopDesc::length_offset_in_bytes());
+ TypeAryPtr::RANGE = TypeAryPtr::make( TypePtr::BotPTR, TypeAry::make(Type::BOTTOM,TypeInt::POS), NULL /* current->env()->Object_klass() */, false, arrayOopDesc::length_offset_in_bytes());
TypeAryPtr::NARROWOOPS = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeNarrowOop::BOTTOM, TypeInt::POS), NULL /*ciArrayKlass::make(o)*/, false, Type::OffsetBot);
@@ -3369,7 +3369,7 @@
tary = TypeAry::make(Type::BOTTOM, tary->_size);
}
}
- bool xk;
+ bool xk = false;
switch (tap->ptr()) {
case AnyNull:
case TopPTR:
@@ -3391,9 +3391,10 @@
o = tap->const_oop();
xk = true;
} else {
- xk = this->_klass_is_exact;
+ // Only precise for identical arrays
+ xk = this->_klass_is_exact && (klass() == tap->klass());
}
- return TypeAryPtr::make( ptr, o, tary, tap->_klass, xk, off, instance_id );
+ return TypeAryPtr::make( ptr, o, tary, lazy_klass, xk, off, instance_id );
}
case NotNull:
case BotPTR:
@@ -3683,12 +3684,10 @@
}
-//------------------------------klass------------------------------------------
-// Return the defining klass for this class
-ciKlass* TypeAryPtr::klass() const {
- if( _klass ) return _klass; // Return cached value, if possible
-
- // Oops, need to compute _klass and cache it
+//----------------------compute_klass------------------------------------------
+// Compute the defining klass for this class
+ciKlass* TypeAryPtr::compute_klass(DEBUG_ONLY(bool verify)) const {
+ // Compute _klass based on element type.
ciKlass* k_ary = NULL;
const TypeInstPtr *tinst;
const TypeAryPtr *tary;
@@ -3715,11 +3714,39 @@
} else {
// Cannot compute array klass directly from basic type,
// since subtypes of TypeInt all have basic type T_INT.
+#ifdef ASSERT
+ if (verify && el->isa_int()) {
+ // Check simple cases when verifying klass.
+ BasicType bt = T_ILLEGAL;
+ if (el == TypeInt::BYTE) {
+ bt = T_BYTE;
+ } else if (el == TypeInt::SHORT) {
+ bt = T_SHORT;
+ } else if (el == TypeInt::CHAR) {
+ bt = T_CHAR;
+ } else if (el == TypeInt::INT) {
+ bt = T_INT;
+ } else {
+ return _klass; // just return specified klass
+ }
+ return ciTypeArrayKlass::make(bt);
+ }
+#endif
assert(!el->isa_int(),
"integral arrays must be pre-equipped with a class");
// Compute array klass directly from basic type
k_ary = ciTypeArrayKlass::make(el->basic_type());
}
+ return k_ary;
+}
+
+//------------------------------klass------------------------------------------
+// Return the defining klass for this class
+ciKlass* TypeAryPtr::klass() const {
+ if( _klass ) return _klass; // Return cached value, if possible
+
+ // Oops, need to compute _klass and cache it
+ ciKlass* k_ary = compute_klass();
if( this != TypeAryPtr::OOPS ) {
// The _klass field acts as a cache of the underlying
--- a/hotspot/src/share/vm/opto/type.hpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/opto/type.hpp Wed Sep 01 00:40:05 2010 -0700
@@ -831,11 +831,30 @@
//------------------------------TypeAryPtr-------------------------------------
// Class of Java array pointers
class TypeAryPtr : public TypeOopPtr {
- TypeAryPtr( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id ) : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id), _ary(ary) {};
+ TypeAryPtr( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id ) : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id), _ary(ary) {
+#ifdef ASSERT
+ if (k != NULL) {
+ // Verify that specified klass and TypeAryPtr::klass() follow the same rules.
+ ciKlass* ck = compute_klass(true);
+ if (UseNewCode || k != ck) {
+ this->dump(); tty->cr();
+ tty->print(" k: ");
+ k->print(); tty->cr();
+ tty->print("ck: ");
+ if (ck != NULL) ck->print();
+ else tty->print("<NULL>");
+ tty->cr();
+ assert(false, "unexpected TypeAryPtr::_klass");
+ }
+ }
+#endif
+ }
virtual bool eq( const Type *t ) const;
virtual int hash() const; // Type specific hashing
const TypeAry *_ary; // Array we point into
+ ciKlass* compute_klass(DEBUG_ONLY(bool verify = false)) const;
+
public:
// Accessors
ciKlass* klass() const;
--- a/hotspot/src/share/vm/runtime/arguments.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/runtime/arguments.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -1513,6 +1513,9 @@
if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeStringConcat)) {
FLAG_SET_DEFAULT(OptimizeStringConcat, true);
}
+ if (AggressiveOpts && FLAG_IS_DEFAULT(OptimizeFill)) {
+ FLAG_SET_DEFAULT(OptimizeFill, true);
+ }
#endif
if (AggressiveOpts) {
--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp Wed Sep 01 00:40:05 2010 -0700
@@ -97,6 +97,15 @@
address StubRoutines::_unsafe_arraycopy = NULL;
address StubRoutines::_generic_arraycopy = NULL;
+
+address StubRoutines::_jbyte_fill;
+address StubRoutines::_jshort_fill;
+address StubRoutines::_jint_fill;
+address StubRoutines::_arrayof_jbyte_fill;
+address StubRoutines::_arrayof_jshort_fill;
+address StubRoutines::_arrayof_jint_fill;
+
+
double (* StubRoutines::_intrinsic_log )(double) = NULL;
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
double (* StubRoutines::_intrinsic_exp )(double) = NULL;
@@ -193,6 +202,46 @@
#undef TEST_ARRAYCOPY
+#define TEST_FILL(type) \
+ if (_##type##_fill != NULL) { \
+ union { \
+ double d; \
+ type body[96]; \
+ } s; \
+ \
+ int v = 32; \
+ for (int offset = -2; offset <= 2; offset++) { \
+ for (int i = 0; i < 96; i++) { \
+ s.body[i] = 1; \
+ } \
+ type* start = s.body + 8 + offset; \
+ for (int aligned = 0; aligned < 2; aligned++) { \
+ if (aligned) { \
+ if (((intptr_t)start) % HeapWordSize == 0) { \
+ ((void (*)(type*, int, int))StubRoutines::_arrayof_##type##_fill)(start, v, 80); \
+ } else { \
+ continue; \
+ } \
+ } else { \
+ ((void (*)(type*, int, int))StubRoutines::_##type##_fill)(start, v, 80); \
+ } \
+ for (int i = 0; i < 96; i++) { \
+ if (i < (8 + offset) || i >= (88 + offset)) { \
+ assert(s.body[i] == 1, "what?"); \
+ } else { \
+ assert(s.body[i] == 32, "what?"); \
+ } \
+ } \
+ } \
+ } \
+ } \
+
+ TEST_FILL(jbyte);
+ TEST_FILL(jshort);
+ TEST_FILL(jint);
+
+#undef TEST_FILL
+
#define TEST_COPYRTN(type) \
test_arraycopy_func(CAST_FROM_FN_PTR(address, Copy::conjoint_##type##s_atomic), sizeof(type)); \
test_arraycopy_func(CAST_FROM_FN_PTR(address, Copy::arrayof_conjoint_##type##s), (int)MAX2(sizeof(HeapWord), sizeof(type)))
@@ -313,3 +362,39 @@
Copy::arrayof_conjoint_oops(src, dest, count);
gen_arraycopy_barrier((oop *) dest, count);
JRT_END
+
+
+address StubRoutines::select_fill_function(BasicType t, bool aligned, const char* &name) {
+#define RETURN_STUB(xxx_fill) { \
+ name = #xxx_fill; \
+ return StubRoutines::xxx_fill(); }
+
+ switch (t) {
+ case T_BYTE:
+ case T_BOOLEAN:
+ if (!aligned) RETURN_STUB(jbyte_fill);
+ RETURN_STUB(arrayof_jbyte_fill);
+ case T_CHAR:
+ case T_SHORT:
+ if (!aligned) RETURN_STUB(jshort_fill);
+ RETURN_STUB(arrayof_jshort_fill);
+ case T_INT:
+ case T_FLOAT:
+ if (!aligned) RETURN_STUB(jint_fill);
+ RETURN_STUB(arrayof_jint_fill);
+ case T_DOUBLE:
+ case T_LONG:
+ case T_ARRAY:
+ case T_OBJECT:
+ case T_NARROWOOP:
+ case T_ADDRESS:
+ // Currently unsupported
+ return NULL;
+
+ default:
+ ShouldNotReachHere();
+ return NULL;
+ }
+
+#undef RETURN_STUB
+}
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp Wed Sep 01 00:40:05 2010 -0700
@@ -148,6 +148,13 @@
static address _unsafe_arraycopy;
static address _generic_arraycopy;
+ static address _jbyte_fill;
+ static address _jshort_fill;
+ static address _jint_fill;
+ static address _arrayof_jbyte_fill;
+ static address _arrayof_jshort_fill;
+ static address _arrayof_jint_fill;
+
// These are versions of the java.lang.Math methods which perform
// the same operations as the intrinsic version. They are used for
// constant folding in the compiler to ensure equivalence. If the
@@ -259,6 +266,16 @@
static address unsafe_arraycopy() { return _unsafe_arraycopy; }
static address generic_arraycopy() { return _generic_arraycopy; }
+ static address jbyte_fill() { return _jbyte_fill; }
+ static address jshort_fill() { return _jshort_fill; }
+ static address jint_fill() { return _jint_fill; }
+ static address arrayof_jbyte_fill() { return _arrayof_jbyte_fill; }
+ static address arrayof_jshort_fill() { return _arrayof_jshort_fill; }
+ static address arrayof_jint_fill() { return _arrayof_jint_fill; }
+
+ static address select_fill_function(BasicType t, bool aligned, const char* &name);
+
+
static double intrinsic_log(double d) {
assert(_intrinsic_log != NULL, "must be defined");
return _intrinsic_log(d);
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp Mon Aug 30 10:58:13 2010 -0700
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp Wed Sep 01 00:40:05 2010 -0700
@@ -529,7 +529,7 @@
#ifdef ASSERT
extern int type2aelembytes(BasicType t, bool allow_address = false); // asserts
#else
-inline int type2aelembytes(BasicType t) { return _type2aelembytes[t]; }
+inline int type2aelembytes(BasicType t, bool allow_address = false) { return _type2aelembytes[t]; }
#endif