jdk-sandbox: changeset 55490:3f3dc00a69a5

--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1383,7 +1383,12 @@
       // save regs before copy_memory
       __ push(RegSet::of(d, count), sp);
     }
-    copy_memory(aligned, s, d, count, rscratch1, size);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+      copy_memory(aligned, s, d, count, rscratch1, size);
+    }
 
     if (is_oop) {
       __ pop(RegSet::of(d, count), sp);
@@ -1455,7 +1460,12 @@
       // save regs before copy_memory
       __ push(RegSet::of(d, count), sp);
     }
-    copy_memory(aligned, s, d, count, rscratch1, -size);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      bool add_entry = !is_oop && (!aligned || sizeof(jlong) == size);
+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+      copy_memory(aligned, s, d, count, rscratch1, -size);
+    }
     if (is_oop) {
       __ pop(RegSet::of(d, count), sp);
       if (VerifyOops)
@@ -5816,6 +5826,10 @@
   }
 }; // end class declaration
 
+#define UCM_TABLE_MAX_ENTRIES 8
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }

--- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -928,7 +928,7 @@
   // Scratches 'count', R3.
   // R4-R10 are preserved (saved/restored).
   //
-  int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count) {
+  int generate_forward_aligned_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool unsafe_copy = false) {
     assert (from == R0 && to == R1 && count == R2, "adjust the implementation below");
 
     const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iteration
@@ -954,107 +954,111 @@
 
     Label L_skip_pld;
 
-    // predecrease to exit when there is less than count_per_loop
-    __ sub_32(count, count, count_per_loop);
-
-    if (pld_offset != 0) {
-      pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
-
-      prefetch(from, to, 0);
-
-      if (prefetch_before) {
-        // If prefetch is done ahead, final PLDs that overflow the
-        // copied area can be easily avoided. 'count' is predecreased
-        // by the prefetch distance to optimize the inner loop and the
-        // outer loop skips the PLD.
-        __ subs_32(count, count, (bytes_per_loop+pld_offset)/bytes_per_count);
-
-        // skip prefetch for small copies
-        __ b(L_skip_pld, lt);
-      }
-
-      int offset = ArmCopyCacheLineSize;
-      while (offset <= pld_offset) {
-        prefetch(from, to, offset);
-        offset += ArmCopyCacheLineSize;
-      };
-    }
-
     {
-      // 32-bit ARM note: we have tried implementing loop unrolling to skip one
-      // PLD with 64 bytes cache line but the gain was not significant.
-
-      Label L_copy_loop;
-      __ align(OptoLoopAlignment);
-      __ BIND(L_copy_loop);
-
-      if (prefetch_before) {
-        prefetch(from, to, bytes_per_loop + pld_offset);
-        __ BIND(L_skip_pld);
-      }
-
-      if (split_read) {
-        // Split the register set in two sets so that there is less
-        // latency between LDM and STM (R3-R6 available while R7-R10
-        // still loading) and less register locking issue when iterating
-        // on the first LDM.
-        __ ldmia(from, RegisterSet(R3, R6), writeback);
-        __ ldmia(from, RegisterSet(R7, R10), writeback);
-      } else {
-        __ ldmia(from, RegisterSet(R3, R10), writeback);
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true);
+      // predecrease to exit when there is less than count_per_loop
+      __ sub_32(count, count, count_per_loop);
+
+      if (pld_offset != 0) {
+        pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
+
+        prefetch(from, to, 0);
+
+        if (prefetch_before) {
+          // If prefetch is done ahead, final PLDs that overflow the
+          // copied area can be easily avoided. 'count' is predecreased
+          // by the prefetch distance to optimize the inner loop and the
+          // outer loop skips the PLD.
+          __ subs_32(count, count, (bytes_per_loop+pld_offset)/bytes_per_count);
+
+          // skip prefetch for small copies
+          __ b(L_skip_pld, lt);
+        }
+
+        int offset = ArmCopyCacheLineSize;
+        while (offset <= pld_offset) {
+          prefetch(from, to, offset);
+          offset += ArmCopyCacheLineSize;
+        };
       }
 
-      __ subs_32(count, count, count_per_loop);
-
-      if (prefetch_after) {
-        prefetch(from, to, pld_offset, bytes_per_loop);
-      }
-
-      if (split_write) {
-        __ stmia(to, RegisterSet(R3, R6), writeback);
-        __ stmia(to, RegisterSet(R7, R10), writeback);
-      } else {
-        __ stmia(to, RegisterSet(R3, R10), writeback);
-      }
-
-      __ b(L_copy_loop, ge);
-
-      if (prefetch_before) {
-        // the inner loop may end earlier, allowing to skip PLD for the last iterations
-        __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
-        __ b(L_skip_pld, ge);
+      {
+        // 32-bit ARM note: we have tried implementing loop unrolling to skip one
+        // PLD with 64 bytes cache line but the gain was not significant.
+
+        Label L_copy_loop;
+        __ align(OptoLoopAlignment);
+        __ BIND(L_copy_loop);
+
+        if (prefetch_before) {
+          prefetch(from, to, bytes_per_loop + pld_offset);
+          __ BIND(L_skip_pld);
+        }
+
+        if (split_read) {
+          // Split the register set in two sets so that there is less
+          // latency between LDM and STM (R3-R6 available while R7-R10
+          // still loading) and less register locking issue when iterating
+          // on the first LDM.
+          __ ldmia(from, RegisterSet(R3, R6), writeback);
+          __ ldmia(from, RegisterSet(R7, R10), writeback);
+        } else {
+          __ ldmia(from, RegisterSet(R3, R10), writeback);
+        }
+
+        __ subs_32(count, count, count_per_loop);
+
+        if (prefetch_after) {
+          prefetch(from, to, pld_offset, bytes_per_loop);
+        }
+
+        if (split_write) {
+          __ stmia(to, RegisterSet(R3, R6), writeback);
+          __ stmia(to, RegisterSet(R7, R10), writeback);
+        } else {
+          __ stmia(to, RegisterSet(R3, R10), writeback);
+        }
+
+        __ b(L_copy_loop, ge);
+
+        if (prefetch_before) {
+          // the inner loop may end earlier, allowing to skip PLD for the last iterations
+          __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
+          __ b(L_skip_pld, ge);
+        }
       }
-    }
-    BLOCK_COMMENT("Remaining bytes:");
-    // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
-
-    // __ add(count, count, ...); // addition useless for the bit tests
-    assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
-
-    __ tst(count, 16 / bytes_per_count);
-    __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
-    __ stmia(to, RegisterSet(R3, R6), writeback, ne);
-
-    __ tst(count, 8 / bytes_per_count);
-    __ ldmia(from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes
-    __ stmia(to, RegisterSet(R3, R4), writeback, ne);
-
-    if (bytes_per_count <= 4) {
-      __ tst(count, 4 / bytes_per_count);
-      __ ldr(R3, Address(from, 4, post_indexed), ne); // copy 4 bytes
-      __ str(R3, Address(to, 4, post_indexed), ne);
-    }
-
-    if (bytes_per_count <= 2) {
-      __ tst(count, 2 / bytes_per_count);
-      __ ldrh(R3, Address(from, 2, post_indexed), ne); // copy 2 bytes
-      __ strh(R3, Address(to, 2, post_indexed), ne);
-    }
-
-    if (bytes_per_count == 1) {
-      __ tst(count, 1);
-      __ ldrb(R3, Address(from, 1, post_indexed), ne);
-      __ strb(R3, Address(to, 1, post_indexed), ne);
+      BLOCK_COMMENT("Remaining bytes:");
+      // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
+
+      // __ add(count, count, ...); // addition useless for the bit tests
+      assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
+
+      __ tst(count, 16 / bytes_per_count);
+      __ ldmia(from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
+      __ stmia(to, RegisterSet(R3, R6), writeback, ne);
+
+      __ tst(count, 8 / bytes_per_count);
+      __ ldmia(from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes
+      __ stmia(to, RegisterSet(R3, R4), writeback, ne);
+
+      if (bytes_per_count <= 4) {
+        __ tst(count, 4 / bytes_per_count);
+        __ ldr(R3, Address(from, 4, post_indexed), ne); // copy 4 bytes
+        __ str(R3, Address(to, 4, post_indexed), ne);
+      }
+
+      if (bytes_per_count <= 2) {
+        __ tst(count, 2 / bytes_per_count);
+        __ ldrh(R3, Address(from, 2, post_indexed), ne); // copy 2 bytes
+        __ strh(R3, Address(to, 2, post_indexed), ne);
+      }
+
+      if (bytes_per_count == 1) {
+        __ tst(count, 1);
+        __ ldrb(R3, Address(from, 1, post_indexed), ne);
+        __ strb(R3, Address(to, 1, post_indexed), ne);
+      }
     }
 
     __ pop(RegisterSet(R4,R10));
@@ -1083,7 +1087,7 @@
   // Scratches 'count', R3.
   // ARM R4-R10 are preserved (saved/restored).
   //
-  int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count) {
+  int generate_backward_aligned_copy_loop(Register end_from, Register end_to, Register count, int bytes_per_count, bool unsafe_copy = false) {
     assert (end_from == R0 && end_to == R1 && count == R2, "adjust the implementation below");
 
     const int bytes_per_loop = 8*wordSize; // 8 registers are read and written on every loop iteration
@@ -1099,102 +1103,105 @@
 
     __ push(RegisterSet(R4,R10));
 
-    __ sub_32(count, count, count_per_loop);
-
-    const bool prefetch_before = pld_offset < 0;
-    const bool prefetch_after = pld_offset > 0;
-
-    Label L_skip_pld;
-
-    if (pld_offset != 0) {
-      pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
-
-      prefetch(end_from, end_to, -wordSize);
-
-      if (prefetch_before) {
-        __ subs_32(count, count, (bytes_per_loop + pld_offset) / bytes_per_count);
-        __ b(L_skip_pld, lt);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true);
+      __ sub_32(count, count, count_per_loop);
+
+      const bool prefetch_before = pld_offset < 0;
+      const bool prefetch_after = pld_offset > 0;
+
+      Label L_skip_pld;
+
+      if (pld_offset != 0) {
+        pld_offset = (pld_offset < 0) ? -pld_offset : pld_offset;
+
+        prefetch(end_from, end_to, -wordSize);
+
+        if (prefetch_before) {
+          __ subs_32(count, count, (bytes_per_loop + pld_offset) / bytes_per_count);
+          __ b(L_skip_pld, lt);
+        }
+
+        int offset = ArmCopyCacheLineSize;
+        while (offset <= pld_offset) {
+          prefetch(end_from, end_to, -(wordSize + offset));
+          offset += ArmCopyCacheLineSize;
+        };
       }
 
-      int offset = ArmCopyCacheLineSize;
-      while (offset <= pld_offset) {
-        prefetch(end_from, end_to, -(wordSize + offset));
-        offset += ArmCopyCacheLineSize;
-      };
-    }
-
-    {
-      // 32-bit ARM note: we have tried implementing loop unrolling to skip one
-      // PLD with 64 bytes cache line but the gain was not significant.
-
-      Label L_copy_loop;
-      __ align(OptoLoopAlignment);
-      __ BIND(L_copy_loop);
-
-      if (prefetch_before) {
-        prefetch(end_from, end_to, -(wordSize + bytes_per_loop + pld_offset));
-        __ BIND(L_skip_pld);
+      {
+        // 32-bit ARM note: we have tried implementing loop unrolling to skip one
+        // PLD with 64 bytes cache line but the gain was not significant.
+
+        Label L_copy_loop;
+        __ align(OptoLoopAlignment);
+        __ BIND(L_copy_loop);
+
+        if (prefetch_before) {
+          prefetch(end_from, end_to, -(wordSize + bytes_per_loop + pld_offset));
+          __ BIND(L_skip_pld);
+        }
+
+        if (split_read) {
+          __ ldmdb(end_from, RegisterSet(R7, R10), writeback);
+          __ ldmdb(end_from, RegisterSet(R3, R6), writeback);
+        } else {
+          __ ldmdb(end_from, RegisterSet(R3, R10), writeback);
+        }
+
+        __ subs_32(count, count, count_per_loop);
+
+        if (prefetch_after) {
+          prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop);
+        }
+
+        if (split_write) {
+          __ stmdb(end_to, RegisterSet(R7, R10), writeback);
+          __ stmdb(end_to, RegisterSet(R3, R6), writeback);
+        } else {
+          __ stmdb(end_to, RegisterSet(R3, R10), writeback);
+        }
+
+        __ b(L_copy_loop, ge);
+
+        if (prefetch_before) {
+          __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
+          __ b(L_skip_pld, ge);
+        }
       }
-
-      if (split_read) {
-        __ ldmdb(end_from, RegisterSet(R7, R10), writeback);
-        __ ldmdb(end_from, RegisterSet(R3, R6), writeback);
-      } else {
-        __ ldmdb(end_from, RegisterSet(R3, R10), writeback);
-      }
-
-      __ subs_32(count, count, count_per_loop);
-
-      if (prefetch_after) {
-        prefetch(end_from, end_to, -(wordSize + pld_offset), -bytes_per_loop);
+      BLOCK_COMMENT("Remaining bytes:");
+      // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
+
+      // __ add(count, count, ...); // addition useless for the bit tests
+      assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
+
+      __ tst(count, 16 / bytes_per_count);
+      __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
+      __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne);
+
+      __ tst(count, 8 / bytes_per_count);
+      __ ldmdb(end_from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes
+      __ stmdb(end_to, RegisterSet(R3, R4), writeback, ne);
+
+      if (bytes_per_count <= 4) {
+        __ tst(count, 4 / bytes_per_count);
+        __ ldr(R3, Address(end_from, -4, pre_indexed), ne); // copy 4 bytes
+        __ str(R3, Address(end_to, -4, pre_indexed), ne);
       }
 
-      if (split_write) {
-        __ stmdb(end_to, RegisterSet(R7, R10), writeback);
-        __ stmdb(end_to, RegisterSet(R3, R6), writeback);
-      } else {
-        __ stmdb(end_to, RegisterSet(R3, R10), writeback);
+      if (bytes_per_count <= 2) {
+        __ tst(count, 2 / bytes_per_count);
+        __ ldrh(R3, Address(end_from, -2, pre_indexed), ne); // copy 2 bytes
+        __ strh(R3, Address(end_to, -2, pre_indexed), ne);
       }
 
-      __ b(L_copy_loop, ge);
-
-      if (prefetch_before) {
-        __ cmn_32(count, (bytes_per_loop + pld_offset)/bytes_per_count);
-        __ b(L_skip_pld, ge);
+      if (bytes_per_count == 1) {
+        __ tst(count, 1);
+        __ ldrb(R3, Address(end_from, -1, pre_indexed), ne);
+        __ strb(R3, Address(end_to, -1, pre_indexed), ne);
       }
     }
-    BLOCK_COMMENT("Remaining bytes:");
-    // still 0..bytes_per_loop-1 aligned bytes to copy, count already decreased by (at least) bytes_per_loop bytes
-
-    // __ add(count, count, ...); // addition useless for the bit tests
-    assert (pld_offset % bytes_per_loop == 0, "decreasing count by pld_offset before loop must not change tested bits");
-
-    __ tst(count, 16 / bytes_per_count);
-    __ ldmdb(end_from, RegisterSet(R3, R6), writeback, ne); // copy 16 bytes
-    __ stmdb(end_to, RegisterSet(R3, R6), writeback, ne);
-
-    __ tst(count, 8 / bytes_per_count);
-    __ ldmdb(end_from, RegisterSet(R3, R4), writeback, ne); // copy 8 bytes
-    __ stmdb(end_to, RegisterSet(R3, R4), writeback, ne);
-
-    if (bytes_per_count <= 4) {
-      __ tst(count, 4 / bytes_per_count);
-      __ ldr(R3, Address(end_from, -4, pre_indexed), ne); // copy 4 bytes
-      __ str(R3, Address(end_to, -4, pre_indexed), ne);
-    }
-
-    if (bytes_per_count <= 2) {
-      __ tst(count, 2 / bytes_per_count);
-      __ ldrh(R3, Address(end_from, -2, pre_indexed), ne); // copy 2 bytes
-      __ strh(R3, Address(end_to, -2, pre_indexed), ne);
-    }
-
-    if (bytes_per_count == 1) {
-      __ tst(count, 1);
-      __ ldrb(R3, Address(end_from, -1, pre_indexed), ne);
-      __ strb(R3, Address(end_to, -1, pre_indexed), ne);
-    }
-
     __ pop(RegisterSet(R4,R10));
 
     return count_per_loop;
@@ -1749,17 +1756,21 @@
   //
   // Notes:
   //     shifts 'from' and 'to'
-  void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry) {
+  void copy_small_array(Register from, Register to, Register count, Register tmp, Register tmp2, int bytes_per_count, bool forward, Label & entry, bool unsafe_copy = false) {
     assert_different_registers(from, to, count, tmp);
 
-    __ align(OptoLoopAlignment);
-    Label L_small_loop;
-    __ BIND(L_small_loop);
-    store_one(tmp, to, bytes_per_count, forward, al, tmp2);
-    __ BIND(entry); // entry point
-    __ subs(count, count, 1);
-    load_one(tmp, from, bytes_per_count, forward, ge, tmp2);
-    __ b(L_small_loop, ge);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true);
+      __ align(OptoLoopAlignment);
+      Label L_small_loop;
+      __ BIND(L_small_loop);
+      store_one(tmp, to, bytes_per_count, forward, al, tmp2);
+      __ BIND(entry); // entry point
+      __ subs(count, count, 1);
+      load_one(tmp, from, bytes_per_count, forward, ge, tmp2);
+      __ b(L_small_loop, ge);
+    }
   }
 
   // Aligns 'to' by reading one word from 'from' and writting its part to 'to'.
@@ -1876,7 +1887,7 @@
   //
   // Scratches 'from', 'count', R3 and R12.
   // R4-R10 saved for use.
-  int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward) {
+  int align_dst_and_generate_shifted_copy_loop(Register from, Register to, Register count, int bytes_per_count, bool forward, bool unsafe_copy = false) {
 
     const Register Rval = forward ? R12 : R3; // as generate_{forward,backward}_shifted_copy_loop expect
 
@@ -1886,60 +1897,64 @@
     // then the remainder of 'to' divided by wordSize is one of elements of {seq}.
 
     __ push(RegisterSet(R4,R10));
-    load_one(Rval, from, wordSize, forward);
-
-    switch (bytes_per_count) {
-      case 2:
-        min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
-        break;
-      case 1:
-      {
-        Label L1, L2, L3;
-        int min_copy1, min_copy2, min_copy3;
-
-        Label L_loop_finished;
-
-        if (forward) {
-            __ tbz(to, 0, L2);
-            __ tbz(to, 1, L1);
-
-            __ BIND(L3);
-            min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
-            __ b(L_loop_finished);
-
-            __ BIND(L1);
-            min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
-            __ b(L_loop_finished);
-
-            __ BIND(L2);
-            min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
-        } else {
-            __ tbz(to, 0, L2);
-            __ tbnz(to, 1, L3);
-
-            __ BIND(L1);
-            min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
-            __ b(L_loop_finished);
-
-             __ BIND(L3);
-            min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
-            __ b(L_loop_finished);
-
-           __ BIND(L2);
-            min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, unsafe_copy, true);
+      load_one(Rval, from, wordSize, forward);
+
+      switch (bytes_per_count) {
+        case 2:
+          min_copy = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+          break;
+        case 1:
+        {
+          Label L1, L2, L3;
+          int min_copy1, min_copy2, min_copy3;
+
+          Label L_loop_finished;
+
+          if (forward) {
+              __ tbz(to, 0, L2);
+              __ tbz(to, 1, L1);
+
+              __ BIND(L3);
+              min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
+              __ b(L_loop_finished);
+
+              __ BIND(L1);
+              min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
+              __ b(L_loop_finished);
+
+              __ BIND(L2);
+              min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+          } else {
+              __ tbz(to, 0, L2);
+              __ tbnz(to, 1, L3);
+
+              __ BIND(L1);
+              min_copy1 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 1, bytes_per_count, forward);
+              __ b(L_loop_finished);
+
+               __ BIND(L3);
+              min_copy3 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 3, bytes_per_count, forward);
+              __ b(L_loop_finished);
+
+             __ BIND(L2);
+              min_copy2 = align_dst_and_generate_shifted_copy_loop(from, to, count, Rval, 2, bytes_per_count, forward);
+          }
+
+          min_copy = MAX2(MAX2(min_copy1, min_copy2), min_copy3);
+
+          __ BIND(L_loop_finished);
+
+          break;
         }
-
-        min_copy = MAX2(MAX2(min_copy1, min_copy2), min_copy3);
-
-        __ BIND(L_loop_finished);
-
-        break;
+        default:
+          ShouldNotReachHere();
+          break;
       }
-      default:
-        ShouldNotReachHere();
-        break;
     }
-
     __ pop(RegisterSet(R4,R10));
 
     return min_copy;
@@ -1963,6 +1978,13 @@
   }
 #endif // !PRODUCT
 
+  address generate_unsafecopy_common_error_exit() {
+    address start_pc = __ pc();
+      __ mov(R0, 0);
+      __ ret();
+    return start_pc;
+  }
+
   //
   //  Generate stub for primitive array copy.  If "aligned" is true, the
   //  "from" and "to" addresses are assumed to be heapword aligned.
@@ -2033,8 +2055,13 @@
         from_is_aligned = true;
     }
 
-    int count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward);
-    assert (small_copy_limit >= count_required_to_align, "alignment could exhaust count");
+    int count_required_to_align = 0;
+    {
+      // UnsafeCopyMemoryMark page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      count_required_to_align = from_is_aligned ? 0 : align_src(from, to, count, tmp1, bytes_per_count, forward);
+      assert (small_copy_limit >= count_required_to_align, "alignment could exhaust count");
+    }
 
     // now 'from' is aligned
 
@@ -2064,9 +2091,9 @@
 
     int min_copy;
     if (forward) {
-      min_copy = generate_forward_aligned_copy_loop (from, to, count, bytes_per_count);
+      min_copy = generate_forward_aligned_copy_loop(from, to, count, bytes_per_count, !aligned /*add UnsafeCopyMemory entry*/);
     } else {
-      min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count);
+      min_copy = generate_backward_aligned_copy_loop(from, to, count, bytes_per_count, !aligned /*add UnsafeCopyMemory entry*/);
     }
     assert(small_copy_limit >= count_required_to_align + min_copy, "first loop might exhaust count");
 
@@ -2077,7 +2104,7 @@
     __ ret();
 
     {
-      copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */);
+      copy_small_array(from, to, count, tmp1, tmp2, bytes_per_count, forward, L_small_array /* entry */, !aligned /*add UnsafeCopyMemory entry*/);
 
       if (status) {
         __ mov(R0, 0); // OK
@@ -2088,7 +2115,7 @@
 
     if (! to_is_aligned) {
       __ BIND(L_unaligned_dst);
-      int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward);
+      int min_copy_shifted = align_dst_and_generate_shifted_copy_loop(from, to, count, bytes_per_count, forward, !aligned /*add UnsafeCopyMemory entry*/);
       assert (small_copy_limit >= count_required_to_align + min_copy_shifted, "first loop might exhaust count");
 
       if (status) {
@@ -2873,6 +2900,9 @@
     status = true; // generate a status compatible with C1 calls
 #endif
 
+    address ucm_common_error_exit       =  generate_unsafecopy_common_error_exit();
+    UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit);
+
     // these need always status in case they are called from generic_arraycopy
     StubRoutines::_jbyte_disjoint_arraycopy  = generate_primitive_copy(false, "jbyte_disjoint_arraycopy",  true, 1, true);
     StubRoutines::_jshort_disjoint_arraycopy = generate_primitive_copy(false, "jshort_disjoint_arraycopy", true, 2, true);
@@ -3055,6 +3085,10 @@
   }
 }; // end class declaration
 
+#define UCM_TABLE_MAX_ENTRIES 32
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }

--- a/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/cpu/ppc/stubGenerator_ppc.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -952,6 +952,20 @@
     // need to copy backwards
   }
 
+  // This is common errorexit stub for UnsafeCopyMemory.
+  address generate_unsafecopy_common_error_exit() {
+    address start_pc = __ pc();
+    Register tmp1 = R6_ARG4;
+    // probably copy stub would have changed value reset it.
+    if (VM_Version::has_mfdscr()) {
+      __ load_const_optimized(tmp1, VM_Version::_dscr_val);
+      __ mtdscr(tmp1);
+    }
+    __ li(R3_RET, 0); // return 0
+    __ blr();
+    return start_pc;
+  }
+
   // The guideline in the implementations of generate_disjoint_xxx_copy
   // (xxx=byte,short,int,long,oop) is to copy as many elements as possible with
   // single instructions, but to avoid alignment interrupts (see subsequent
@@ -989,150 +1003,154 @@
     VectorSRegister tmp_vsr2  = VSR2;
 
     Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10;
-
-    // Don't try anything fancy if arrays don't have many elements.
-    __ li(tmp3, 0);
-    __ cmpwi(CCR0, R5_ARG3, 17);
-    __ ble(CCR0, l_6); // copy 4 at a time
-
-    if (!aligned) {
-      __ xorr(tmp1, R3_ARG1, R4_ARG2);
-      __ andi_(tmp1, tmp1, 3);
-      __ bne(CCR0, l_6); // If arrays don't have the same alignment mod 4, do 4 element copy.
-
-      // Copy elements if necessary to align to 4 bytes.
-      __ neg(tmp1, R3_ARG1); // Compute distance to alignment boundary.
-      __ andi_(tmp1, tmp1, 3);
-      __ beq(CCR0, l_2);
-
-      __ subf(R5_ARG3, tmp1, R5_ARG3);
-      __ bind(l_9);
-      __ lbz(tmp2, 0, R3_ARG1);
-      __ addic_(tmp1, tmp1, -1);
-      __ stb(tmp2, 0, R4_ARG2);
-      __ addi(R3_ARG1, R3_ARG1, 1);
-      __ addi(R4_ARG2, R4_ARG2, 1);
-      __ bne(CCR0, l_9);
-
-      __ bind(l_2);
-    }
-
-    // copy 8 elements at a time
-    __ xorr(tmp2, R3_ARG1, R4_ARG2); // skip if src & dest have differing alignment mod 8
-    __ andi_(tmp1, tmp2, 7);
-    __ bne(CCR0, l_7); // not same alignment -> to or from is aligned -> copy 8
-
-    // copy a 2-element word if necessary to align to 8 bytes
-    __ andi_(R0, R3_ARG1, 7);
-    __ beq(CCR0, l_7);
-
-    __ lwzx(tmp2, R3_ARG1, tmp3);
-    __ addi(R5_ARG3, R5_ARG3, -4);
-    __ stwx(tmp2, R4_ARG2, tmp3);
-    { // FasterArrayCopy
-      __ addi(R3_ARG1, R3_ARG1, 4);
-      __ addi(R4_ARG2, R4_ARG2, 4);
-    }
-    __ bind(l_7);
-
-    { // FasterArrayCopy
-      __ cmpwi(CCR0, R5_ARG3, 31);
-      __ ble(CCR0, l_6); // copy 2 at a time if less than 32 elements remain
-
-      __ srdi(tmp1, R5_ARG3, 5);
-      __ andi_(R5_ARG3, R5_ARG3, 31);
-      __ mtctr(tmp1);
-
-     if (!VM_Version::has_vsx()) {
-
-      __ bind(l_8);
-      // Use unrolled version for mass copying (copy 32 elements a time)
-      // Load feeding store gets zero latency on Power6, however not on Power5.
-      // Therefore, the following sequence is made for the good of both.
-      __ ld(tmp1, 0, R3_ARG1);
-      __ ld(tmp2, 8, R3_ARG1);
-      __ ld(tmp3, 16, R3_ARG1);
-      __ ld(tmp4, 24, R3_ARG1);
-      __ std(tmp1, 0, R4_ARG2);
-      __ std(tmp2, 8, R4_ARG2);
-      __ std(tmp3, 16, R4_ARG2);
-      __ std(tmp4, 24, R4_ARG2);
-      __ addi(R3_ARG1, R3_ARG1, 32);
-      __ addi(R4_ARG2, R4_ARG2, 32);
-      __ bdnz(l_8);
-
-    } else { // Processor supports VSX, so use it to mass copy.
-
-      // Prefetch the data into the L2 cache.
-      __ dcbt(R3_ARG1, 0);
-
-      // If supported set DSCR pre-fetch to deepest.
-      if (VM_Version::has_mfdscr()) {
-        __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
-        __ mtdscr(tmp2);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+
+      // Don't try anything fancy if arrays don't have many elements.
+      __ li(tmp3, 0);
+      __ cmpwi(CCR0, R5_ARG3, 17);
+      __ ble(CCR0, l_6); // copy 4 at a time
+
+      if (!aligned) {
+        __ xorr(tmp1, R3_ARG1, R4_ARG2);
+        __ andi_(tmp1, tmp1, 3);
+        __ bne(CCR0, l_6); // If arrays don't have the same alignment mod 4, do 4 element copy.
+
+        // Copy elements if necessary to align to 4 bytes.
+        __ neg(tmp1, R3_ARG1); // Compute distance to alignment boundary.
+        __ andi_(tmp1, tmp1, 3);
+        __ beq(CCR0, l_2);
+
+        __ subf(R5_ARG3, tmp1, R5_ARG3);
+        __ bind(l_9);
+        __ lbz(tmp2, 0, R3_ARG1);
+        __ addic_(tmp1, tmp1, -1);
+        __ stb(tmp2, 0, R4_ARG2);
+        __ addi(R3_ARG1, R3_ARG1, 1);
+        __ addi(R4_ARG2, R4_ARG2, 1);
+        __ bne(CCR0, l_9);
+
+        __ bind(l_2);
+      }
+
+      // copy 8 elements at a time
+      __ xorr(tmp2, R3_ARG1, R4_ARG2); // skip if src & dest have differing alignment mod 8
+      __ andi_(tmp1, tmp2, 7);
+      __ bne(CCR0, l_7); // not same alignment -> to or from is aligned -> copy 8
+
+      // copy a 2-element word if necessary to align to 8 bytes
+      __ andi_(R0, R3_ARG1, 7);
+      __ beq(CCR0, l_7);
+
+      __ lwzx(tmp2, R3_ARG1, tmp3);
+      __ addi(R5_ARG3, R5_ARG3, -4);
+      __ stwx(tmp2, R4_ARG2, tmp3);
+      { // FasterArrayCopy
+        __ addi(R3_ARG1, R3_ARG1, 4);
+        __ addi(R4_ARG2, R4_ARG2, 4);
       }
-
-      __ li(tmp1, 16);
-
-      // Backbranch target aligned to 32-byte. Not 16-byte align as
-      // loop contains < 8 instructions that fit inside a single
-      // i-cache sector.
-      __ align(32);
-
-      __ bind(l_10);
-      // Use loop with VSX load/store instructions to
-      // copy 32 elements a time.
-      __ lxvd2x(tmp_vsr1, R3_ARG1);        // Load src
-      __ stxvd2x(tmp_vsr1, R4_ARG2);       // Store to dst
-      __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1);  // Load src + 16
-      __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16
-      __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32
-      __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32
-      __ bdnz(l_10);                       // Dec CTR and loop if not zero.
-
-      // Restore DSCR pre-fetch value.
-      if (VM_Version::has_mfdscr()) {
-        __ load_const_optimized(tmp2, VM_Version::_dscr_val);
-        __ mtdscr(tmp2);
+      __ bind(l_7);
+
+      { // FasterArrayCopy
+        __ cmpwi(CCR0, R5_ARG3, 31);
+        __ ble(CCR0, l_6); // copy 2 at a time if less than 32 elements remain
+
+        __ srdi(tmp1, R5_ARG3, 5);
+        __ andi_(R5_ARG3, R5_ARG3, 31);
+        __ mtctr(tmp1);
+
+       if (!VM_Version::has_vsx()) {
+
+        __ bind(l_8);
+        // Use unrolled version for mass copying (copy 32 elements a time)
+        // Load feeding store gets zero latency on Power6, however not on Power5.
+        // Therefore, the following sequence is made for the good of both.
+        __ ld(tmp1, 0, R3_ARG1);
+        __ ld(tmp2, 8, R3_ARG1);
+        __ ld(tmp3, 16, R3_ARG1);
+        __ ld(tmp4, 24, R3_ARG1);
+        __ std(tmp1, 0, R4_ARG2);
+        __ std(tmp2, 8, R4_ARG2);
+        __ std(tmp3, 16, R4_ARG2);
+        __ std(tmp4, 24, R4_ARG2);
+        __ addi(R3_ARG1, R3_ARG1, 32);
+        __ addi(R4_ARG2, R4_ARG2, 32);
+        __ bdnz(l_8);
+
+      } else { // Processor supports VSX, so use it to mass copy.
+
+        // Prefetch the data into the L2 cache.
+        __ dcbt(R3_ARG1, 0);
+
+        // If supported set DSCR pre-fetch to deepest.
+        if (VM_Version::has_mfdscr()) {
+          __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
+          __ mtdscr(tmp2);
+        }
+
+        __ li(tmp1, 16);
+
+        // Backbranch target aligned to 32-byte. Not 16-byte align as
+        // loop contains < 8 instructions that fit inside a single
+        // i-cache sector.
+        __ align(32);
+
+        __ bind(l_10);
+        // Use loop with VSX load/store instructions to
+        // copy 32 elements a time.
+        __ lxvd2x(tmp_vsr1, R3_ARG1);        // Load src
+        __ stxvd2x(tmp_vsr1, R4_ARG2);       // Store to dst
+        __ lxvd2x(tmp_vsr2, tmp1, R3_ARG1);  // Load src + 16
+        __ stxvd2x(tmp_vsr2, tmp1, R4_ARG2); // Store to dst + 16
+        __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32
+        __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32
+        __ bdnz(l_10);                       // Dec CTR and loop if not zero.
+
+        // Restore DSCR pre-fetch value.
+        if (VM_Version::has_mfdscr()) {
+          __ load_const_optimized(tmp2, VM_Version::_dscr_val);
+          __ mtdscr(tmp2);
+        }
+
+      } // VSX
+     } // FasterArrayCopy
+
+      __ bind(l_6);
+
+      // copy 4 elements at a time
+      __ cmpwi(CCR0, R5_ARG3, 4);
+      __ blt(CCR0, l_1);
+      __ srdi(tmp1, R5_ARG3, 2);
+      __ mtctr(tmp1); // is > 0
+      __ andi_(R5_ARG3, R5_ARG3, 3);
+
+      { // FasterArrayCopy
+        __ addi(R3_ARG1, R3_ARG1, -4);
+        __ addi(R4_ARG2, R4_ARG2, -4);
+        __ bind(l_3);
+        __ lwzu(tmp2, 4, R3_ARG1);
+        __ stwu(tmp2, 4, R4_ARG2);
+        __ bdnz(l_3);
+        __ addi(R3_ARG1, R3_ARG1, 4);
+        __ addi(R4_ARG2, R4_ARG2, 4);
       }
 
-    } // VSX
-   } // FasterArrayCopy
-
-    __ bind(l_6);
-
-    // copy 4 elements at a time
-    __ cmpwi(CCR0, R5_ARG3, 4);
-    __ blt(CCR0, l_1);
-    __ srdi(tmp1, R5_ARG3, 2);
-    __ mtctr(tmp1); // is > 0
-    __ andi_(R5_ARG3, R5_ARG3, 3);
-
-    { // FasterArrayCopy
-      __ addi(R3_ARG1, R3_ARG1, -4);
-      __ addi(R4_ARG2, R4_ARG2, -4);
-      __ bind(l_3);
-      __ lwzu(tmp2, 4, R3_ARG1);
-      __ stwu(tmp2, 4, R4_ARG2);
-      __ bdnz(l_3);
-      __ addi(R3_ARG1, R3_ARG1, 4);
-      __ addi(R4_ARG2, R4_ARG2, 4);
-    }
-
-    // do single element copy
-    __ bind(l_1);
-    __ cmpwi(CCR0, R5_ARG3, 0);
-    __ beq(CCR0, l_4);
-
-    { // FasterArrayCopy
-      __ mtctr(R5_ARG3);
-      __ addi(R3_ARG1, R3_ARG1, -1);
-      __ addi(R4_ARG2, R4_ARG2, -1);
-
-      __ bind(l_5);
-      __ lbzu(tmp2, 1, R3_ARG1);
-      __ stbu(tmp2, 1, R4_ARG2);
-      __ bdnz(l_5);
+      // do single element copy
+      __ bind(l_1);
+      __ cmpwi(CCR0, R5_ARG3, 0);
+      __ beq(CCR0, l_4);
+
+      { // FasterArrayCopy
+        __ mtctr(R5_ARG3);
+        __ addi(R3_ARG1, R3_ARG1, -1);
+        __ addi(R4_ARG2, R4_ARG2, -1);
+
+        __ bind(l_5);
+        __ lbzu(tmp2, 1, R3_ARG1);
+        __ stbu(tmp2, 1, R4_ARG2);
+        __ bdnz(l_5);
+      }
     }
 
     __ bind(l_4);
@@ -1167,15 +1185,17 @@
     // Do reverse copy. We assume the case of actual overlap is rare enough
     // that we don't have to optimize it.
     Label l_1, l_2;
-
-    __ b(l_2);
-    __ bind(l_1);
-    __ stbx(tmp1, R4_ARG2, R5_ARG3);
-    __ bind(l_2);
-    __ addic_(R5_ARG3, R5_ARG3, -1);
-    __ lbzx(tmp1, R3_ARG1, R5_ARG3);
-    __ bge(CCR0, l_1);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      __ b(l_2);
+      __ bind(l_1);
+      __ stbx(tmp1, R4_ARG2, R5_ARG3);
+      __ bind(l_2);
+      __ addic_(R5_ARG3, R5_ARG3, -1);
+      __ lbzx(tmp1, R3_ARG1, R5_ARG3);
+      __ bge(CCR0, l_1);
+    }
     __ li(R3_RET, 0); // return 0
     __ blr();
 
@@ -1252,155 +1272,159 @@
     assert_positive_int(R5_ARG3);
 
     Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9;
-
-    // don't try anything fancy if arrays don't have many elements
-    __ li(tmp3, 0);
-    __ cmpwi(CCR0, R5_ARG3, 9);
-    __ ble(CCR0, l_6); // copy 2 at a time
-
-    if (!aligned) {
-      __ xorr(tmp1, R3_ARG1, R4_ARG2);
-      __ andi_(tmp1, tmp1, 3);
-      __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy
-
-      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
-
-      // Copy 1 element if necessary to align to 4 bytes.
-      __ andi_(tmp1, R3_ARG1, 3);
-      __ beq(CCR0, l_2);
-
-      __ lhz(tmp2, 0, R3_ARG1);
-      __ addi(R3_ARG1, R3_ARG1, 2);
-      __ sth(tmp2, 0, R4_ARG2);
-      __ addi(R4_ARG2, R4_ARG2, 2);
-      __ addi(R5_ARG3, R5_ARG3, -1);
-      __ bind(l_2);
-
-      // At this point the positions of both, from and to, are at least 4 byte aligned.
-
-      // Copy 4 elements at a time.
-      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
-      __ xorr(tmp2, R3_ARG1, R4_ARG2);
-      __ andi_(tmp1, tmp2, 7);
-      __ bne(CCR0, l_7); // not same alignment mod 8 -> copy 4, either from or to will be unaligned
-
-      // Copy a 2-element word if necessary to align to 8 bytes.
-      __ andi_(R0, R3_ARG1, 7);
-      __ beq(CCR0, l_7);
-
-      __ lwzx(tmp2, R3_ARG1, tmp3);
-      __ addi(R5_ARG3, R5_ARG3, -2);
-      __ stwx(tmp2, R4_ARG2, tmp3);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      // don't try anything fancy if arrays don't have many elements
+      __ li(tmp3, 0);
+      __ cmpwi(CCR0, R5_ARG3, 9);
+      __ ble(CCR0, l_6); // copy 2 at a time
+
+      if (!aligned) {
+        __ xorr(tmp1, R3_ARG1, R4_ARG2);
+        __ andi_(tmp1, tmp1, 3);
+        __ bne(CCR0, l_6); // if arrays don't have the same alignment mod 4, do 2 element copy
+
+        // At this point it is guaranteed that both, from and to have the same alignment mod 4.
+
+        // Copy 1 element if necessary to align to 4 bytes.
+        __ andi_(tmp1, R3_ARG1, 3);
+        __ beq(CCR0, l_2);
+
+        __ lhz(tmp2, 0, R3_ARG1);
+        __ addi(R3_ARG1, R3_ARG1, 2);
+        __ sth(tmp2, 0, R4_ARG2);
+        __ addi(R4_ARG2, R4_ARG2, 2);
+        __ addi(R5_ARG3, R5_ARG3, -1);
+        __ bind(l_2);
+
+        // At this point the positions of both, from and to, are at least 4 byte aligned.
+
+        // Copy 4 elements at a time.
+        // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
+        __ xorr(tmp2, R3_ARG1, R4_ARG2);
+        __ andi_(tmp1, tmp2, 7);
+        __ bne(CCR0, l_7); // not same alignment mod 8 -> copy 4, either from or to will be unaligned
+
+        // Copy a 2-element word if necessary to align to 8 bytes.
+        __ andi_(R0, R3_ARG1, 7);
+        __ beq(CCR0, l_7);
+
+        __ lwzx(tmp2, R3_ARG1, tmp3);
+        __ addi(R5_ARG3, R5_ARG3, -2);
+        __ stwx(tmp2, R4_ARG2, tmp3);
+        { // FasterArrayCopy
+          __ addi(R3_ARG1, R3_ARG1, 4);
+          __ addi(R4_ARG2, R4_ARG2, 4);
+        }
+      }
+
+      __ bind(l_7);
+
+      // Copy 4 elements at a time; either the loads or the stores can
+      // be unaligned if aligned == false.
+
       { // FasterArrayCopy
+        __ cmpwi(CCR0, R5_ARG3, 15);
+        __ ble(CCR0, l_6); // copy 2 at a time if less than 16 elements remain
+
+        __ srdi(tmp1, R5_ARG3, 4);
+        __ andi_(R5_ARG3, R5_ARG3, 15);
+        __ mtctr(tmp1);
+
+        if (!VM_Version::has_vsx()) {
+
+          __ bind(l_8);
+          // Use unrolled version for mass copying (copy 16 elements a time).
+          // Load feeding store gets zero latency on Power6, however not on Power5.
+          // Therefore, the following sequence is made for the good of both.
+          __ ld(tmp1, 0, R3_ARG1);
+          __ ld(tmp2, 8, R3_ARG1);
+          __ ld(tmp3, 16, R3_ARG1);
+          __ ld(tmp4, 24, R3_ARG1);
+          __ std(tmp1, 0, R4_ARG2);
+          __ std(tmp2, 8, R4_ARG2);
+          __ std(tmp3, 16, R4_ARG2);
+          __ std(tmp4, 24, R4_ARG2);
+          __ addi(R3_ARG1, R3_ARG1, 32);
+          __ addi(R4_ARG2, R4_ARG2, 32);
+          __ bdnz(l_8);
+
+        } else { // Processor supports VSX, so use it to mass copy.
+
+          // Prefetch src data into L2 cache.
+          __ dcbt(R3_ARG1, 0);
+
+          // If supported set DSCR pre-fetch to deepest.
+          if (VM_Version::has_mfdscr()) {
+            __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
+            __ mtdscr(tmp2);
+          }
+          __ li(tmp1, 16);
+
+          // Backbranch target aligned to 32-byte. It's not aligned 16-byte
+          // as loop contains < 8 instructions that fit inside a single
+          // i-cache sector.
+          __ align(32);
+
+          __ bind(l_9);
+          // Use loop with VSX load/store instructions to
+          // copy 16 elements a time.
+          __ lxvd2x(tmp_vsr1, R3_ARG1);        // Load from src.
+          __ stxvd2x(tmp_vsr1, R4_ARG2);       // Store to dst.
+          __ lxvd2x(tmp_vsr2, R3_ARG1, tmp1);  // Load from src + 16.
+          __ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16.
+          __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32.
+          __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32.
+          __ bdnz(l_9);                        // Dec CTR and loop if not zero.
+
+          // Restore DSCR pre-fetch value.
+          if (VM_Version::has_mfdscr()) {
+            __ load_const_optimized(tmp2, VM_Version::_dscr_val);
+            __ mtdscr(tmp2);
+          }
+
+        }
+      } // FasterArrayCopy
+      __ bind(l_6);
+
+      // copy 2 elements at a time
+      { // FasterArrayCopy
+        __ cmpwi(CCR0, R5_ARG3, 2);
+        __ blt(CCR0, l_1);
+        __ srdi(tmp1, R5_ARG3, 1);
+        __ andi_(R5_ARG3, R5_ARG3, 1);
+
+        __ addi(R3_ARG1, R3_ARG1, -4);
+        __ addi(R4_ARG2, R4_ARG2, -4);
+        __ mtctr(tmp1);
+
+        __ bind(l_3);
+        __ lwzu(tmp2, 4, R3_ARG1);
+        __ stwu(tmp2, 4, R4_ARG2);
+        __ bdnz(l_3);
+
         __ addi(R3_ARG1, R3_ARG1, 4);
         __ addi(R4_ARG2, R4_ARG2, 4);
       }
+
+      // do single element copy
+      __ bind(l_1);
+      __ cmpwi(CCR0, R5_ARG3, 0);
+      __ beq(CCR0, l_4);
+
+      { // FasterArrayCopy
+        __ mtctr(R5_ARG3);
+        __ addi(R3_ARG1, R3_ARG1, -2);
+        __ addi(R4_ARG2, R4_ARG2, -2);
+
+        __ bind(l_5);
+        __ lhzu(tmp2, 2, R3_ARG1);
+        __ sthu(tmp2, 2, R4_ARG2);
+        __ bdnz(l_5);
+      }
     }
 
-    __ bind(l_7);
-
-    // Copy 4 elements at a time; either the loads or the stores can
-    // be unaligned if aligned == false.
-
-    { // FasterArrayCopy
-      __ cmpwi(CCR0, R5_ARG3, 15);
-      __ ble(CCR0, l_6); // copy 2 at a time if less than 16 elements remain
-
-      __ srdi(tmp1, R5_ARG3, 4);
-      __ andi_(R5_ARG3, R5_ARG3, 15);
-      __ mtctr(tmp1);
-
-      if (!VM_Version::has_vsx()) {
-
-        __ bind(l_8);
-        // Use unrolled version for mass copying (copy 16 elements a time).
-        // Load feeding store gets zero latency on Power6, however not on Power5.
-        // Therefore, the following sequence is made for the good of both.
-        __ ld(tmp1, 0, R3_ARG1);
-        __ ld(tmp2, 8, R3_ARG1);
-        __ ld(tmp3, 16, R3_ARG1);
-        __ ld(tmp4, 24, R3_ARG1);
-        __ std(tmp1, 0, R4_ARG2);
-        __ std(tmp2, 8, R4_ARG2);
-        __ std(tmp3, 16, R4_ARG2);
-        __ std(tmp4, 24, R4_ARG2);
-        __ addi(R3_ARG1, R3_ARG1, 32);
-        __ addi(R4_ARG2, R4_ARG2, 32);
-        __ bdnz(l_8);
-
-      } else { // Processor supports VSX, so use it to mass copy.
-
-        // Prefetch src data into L2 cache.
-        __ dcbt(R3_ARG1, 0);
-
-        // If supported set DSCR pre-fetch to deepest.
-        if (VM_Version::has_mfdscr()) {
-          __ load_const_optimized(tmp2, VM_Version::_dscr_val | 7);
-          __ mtdscr(tmp2);
-        }
-        __ li(tmp1, 16);
-
-        // Backbranch target aligned to 32-byte. It's not aligned 16-byte
-        // as loop contains < 8 instructions that fit inside a single
-        // i-cache sector.
-        __ align(32);
-
-        __ bind(l_9);
-        // Use loop with VSX load/store instructions to
-        // copy 16 elements a time.
-        __ lxvd2x(tmp_vsr1, R3_ARG1);        // Load from src.
-        __ stxvd2x(tmp_vsr1, R4_ARG2);       // Store to dst.
-        __ lxvd2x(tmp_vsr2, R3_ARG1, tmp1);  // Load from src + 16.
-        __ stxvd2x(tmp_vsr2, R4_ARG2, tmp1); // Store to dst + 16.
-        __ addi(R3_ARG1, R3_ARG1, 32);       // Update src+=32.
-        __ addi(R4_ARG2, R4_ARG2, 32);       // Update dsc+=32.
-        __ bdnz(l_9);                        // Dec CTR and loop if not zero.
-
-        // Restore DSCR pre-fetch value.
-        if (VM_Version::has_mfdscr()) {
-          __ load_const_optimized(tmp2, VM_Version::_dscr_val);
-          __ mtdscr(tmp2);
-        }
-
-      }
-    } // FasterArrayCopy
-    __ bind(l_6);
-
-    // copy 2 elements at a time
-    { // FasterArrayCopy
-      __ cmpwi(CCR0, R5_ARG3, 2);
-      __ blt(CCR0, l_1);
-      __ srdi(tmp1, R5_ARG3, 1);
-      __ andi_(R5_ARG3, R5_ARG3, 1);
-
-      __ addi(R3_ARG1, R3_ARG1, -4);
-      __ addi(R4_ARG2, R4_ARG2, -4);
-      __ mtctr(tmp1);
-
-      __ bind(l_3);
-      __ lwzu(tmp2, 4, R3_ARG1);
-      __ stwu(tmp2, 4, R4_ARG2);
-      __ bdnz(l_3);
-
-      __ addi(R3_ARG1, R3_ARG1, 4);
-      __ addi(R4_ARG2, R4_ARG2, 4);
-    }
-
-    // do single element copy
-    __ bind(l_1);
-    __ cmpwi(CCR0, R5_ARG3, 0);
-    __ beq(CCR0, l_4);
-
-    { // FasterArrayCopy
-      __ mtctr(R5_ARG3);
-      __ addi(R3_ARG1, R3_ARG1, -2);
-      __ addi(R4_ARG2, R4_ARG2, -2);
-
-      __ bind(l_5);
-      __ lhzu(tmp2, 2, R3_ARG1);
-      __ sthu(tmp2, 2, R4_ARG2);
-      __ bdnz(l_5);
-    }
     __ bind(l_4);
     __ li(R3_RET, 0); // return 0
     __ blr();
@@ -1432,15 +1456,18 @@
     array_overlap_test(nooverlap_target, 1);
 
     Label l_1, l_2;
-    __ sldi(tmp1, R5_ARG3, 1);
-    __ b(l_2);
-    __ bind(l_1);
-    __ sthx(tmp2, R4_ARG2, tmp1);
-    __ bind(l_2);
-    __ addic_(tmp1, tmp1, -2);
-    __ lhzx(tmp2, R3_ARG1, tmp1);
-    __ bge(CCR0, l_1);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      __ sldi(tmp1, R5_ARG3, 1);
+      __ b(l_2);
+      __ bind(l_1);
+      __ sthx(tmp2, R4_ARG2, tmp1);
+      __ bind(l_2);
+      __ addic_(tmp1, tmp1, -2);
+      __ lhzx(tmp2, R3_ARG1, tmp1);
+      __ bge(CCR0, l_1);
+    }
     __ li(R3_RET, 0); // return 0
     __ blr();
 
@@ -1588,7 +1615,11 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
     assert_positive_int(R5_ARG3);
-    generate_disjoint_int_copy_core(aligned);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_disjoint_int_copy_core(aligned);
+    }
     __ li(R3_RET, 0); // return 0
     __ blr();
     return start;
@@ -1736,8 +1767,11 @@
       STUB_ENTRY(jint_disjoint_arraycopy);
 
     array_overlap_test(nooverlap_target, 2);
-
-    generate_conjoint_int_copy_core(aligned);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_conjoint_int_copy_core(aligned);
+    }
 
     __ li(R3_RET, 0); // return 0
     __ blr();
@@ -1859,11 +1893,15 @@
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ function_entry();
     assert_positive_int(R5_ARG3);
-    generate_disjoint_long_copy_core(aligned);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_disjoint_long_copy_core(aligned);
+    }
     __ li(R3_RET, 0); // return 0
     __ blr();
 
-    return start;
+  return start;
   }
 
   // Generate core code for conjoint long copy (and oop copy on
@@ -1986,8 +2024,11 @@
       STUB_ENTRY(jlong_disjoint_arraycopy);
 
     array_overlap_test(nooverlap_target, 3);
-    generate_conjoint_long_copy_core(aligned);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_conjoint_long_copy_core(aligned);
+    }
     __ li(R3_RET, 0); // return 0
     __ blr();
 
@@ -3008,6 +3049,9 @@
     // Note: the disjoint stubs must be generated first, some of
     // the conjoint stubs use them.
 
+    address ucm_common_error_exit       =  generate_unsafecopy_common_error_exit();
+    UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit);
+
     // non-aligned disjoint versions
     StubRoutines::_jbyte_disjoint_arraycopy       = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
     StubRoutines::_jshort_disjoint_arraycopy      = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
@@ -3579,6 +3623,10 @@
   }
 };
 
+#define UCM_TABLE_MAX_ENTRIES 8
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }

--- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1076,6 +1076,17 @@
       __ delayed()->add(end_from, left_shift, end_from); // restore address
   }
 
+  address generate_unsafecopy_common_error_exit() {
+    address start_pc = __ pc();
+    if (UseBlockCopy) {
+      __ wrasi(G0, Assembler::ASI_PRIMARY_NOFAULT);
+      __ membar(Assembler::StoreLoad);
+    }
+    __ retl();
+    __ delayed()->mov(G0, O0); // return 0
+    return start_pc;
+  }
+
   //
   //  Generate stub for disjoint byte copy.  If "aligned" is true, the
   //  "from" and "to" addresses are assumed to be heapword aligned.
@@ -1107,61 +1118,66 @@
       BLOCK_COMMENT("Entry:");
     }
 
-    // for short arrays, just do single element copy
-    __ cmp(count, 23); // 16 + 7
-    __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
-    __ delayed()->mov(G0, offset);
-
-    if (aligned) {
-      // 'aligned' == true when it is known statically during compilation
-      // of this arraycopy call site that both 'from' and 'to' addresses
-      // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
-      //
-      // Aligned arrays have 4 bytes alignment in 32-bits VM
-      // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
-      //
-    } else {
-      // copy bytes to align 'to' on 8 byte boundary
-      __ andcc(to, 7, G1); // misaligned bytes
-      __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
-      __ delayed()->neg(G1);
-      __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
-      __ sub(count, G1, count);
-    __ BIND(L_align);
-      __ ldub(from, 0, O3);
-      __ deccc(G1);
-      __ inc(from);
-      __ stb(O3, to, 0);
-      __ br(Assembler::notZero, false, Assembler::pt, L_align);
-      __ delayed()->inc(to);
-    __ BIND(L_skip_alignment);
-    }
-    if (!aligned) {
-      // Copy with shift 16 bytes per iteration if arrays do not have
-      // the same alignment mod 8, otherwise fall through to the next
-      // code for aligned copy.
-      // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
-      // Also jump over aligned copy after the copy with shift completed.
-
-      copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
-    }
-
-    // Both array are 8 bytes aligned, copy 16 bytes at a time
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+
+      // for short arrays, just do single element copy
+      __ cmp(count, 23); // 16 + 7
+      __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
+      __ delayed()->mov(G0, offset);
+
+      if (aligned) {
+        // 'aligned' == true when it is known statically during compilation
+        // of this arraycopy call site that both 'from' and 'to' addresses
+        // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
+        //
+        // Aligned arrays have 4 bytes alignment in 32-bits VM
+        // and 8 bytes - in 64-bits VM. So we do it only for 32-bits VM
+        //
+      } else {
+        // copy bytes to align 'to' on 8 byte boundary
+        __ andcc(to, 7, G1); // misaligned bytes
+        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
+        __ delayed()->neg(G1);
+        __ inc(G1, 8);       // bytes need to copy to next 8-bytes alignment
+        __ sub(count, G1, count);
+      __ BIND(L_align);
+        __ ldub(from, 0, O3);
+        __ deccc(G1);
+        __ inc(from);
+        __ stb(O3, to, 0);
+        __ br(Assembler::notZero, false, Assembler::pt, L_align);
+        __ delayed()->inc(to);
+      __ BIND(L_skip_alignment);
+      }
+      if (!aligned) {
+        // Copy with shift 16 bytes per iteration if arrays do not have
+        // the same alignment mod 8, otherwise fall through to the next
+        // code for aligned copy.
+        // The compare above (count >= 23) guarantes 'count' >= 16 bytes.
+        // Also jump over aligned copy after the copy with shift completed.
+
+        copy_16_bytes_forward_with_shift(from, to, count, 0, L_copy_byte);
+      }
+
+      // Both array are 8 bytes aligned, copy 16 bytes at a time
       __ and3(count, 7, G4); // Save count
       __ srl(count, 3, count);
-     generate_disjoint_long_copy_core(aligned);
+      generate_disjoint_long_copy_core(aligned);
       __ mov(G4, count);     // Restore count
 
-    // copy tailing bytes
-    __ BIND(L_copy_byte);
-      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_byte_loop);
-      __ ldub(from, offset, O3);
-      __ deccc(count);
-      __ stb(O3, to, offset);
-      __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
-      __ delayed()->inc(offset);
+      // copy tailing bytes
+      __ BIND(L_copy_byte);
+        __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
+        __ align(OptoLoopAlignment);
+      __ BIND(L_copy_byte_loop);
+        __ ldub(from, offset, O3);
+        __ deccc(count);
+        __ stb(O3, to, offset);
+        __ brx(Assembler::notZero, false, Assembler::pt, L_copy_byte_loop);
+        __ delayed()->inc(offset);
+    }
 
     __ BIND(L_exit);
       // O3, O4 are used as temp registers
@@ -1207,70 +1223,75 @@
 
     array_overlap_test(nooverlap_target, 0);
 
-    __ add(to, count, end_to);       // offset after last copied element
-
-    // for short arrays, just do single element copy
-    __ cmp(count, 23); // 16 + 7
-    __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
-    __ delayed()->add(from, count, end_from);
-
     {
-      // Align end of arrays since they could be not aligned even
-      // when arrays itself are aligned.
-
-      // copy bytes to align 'end_to' on 8 byte boundary
-      __ andcc(end_to, 7, G1); // misaligned bytes
-      __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
-      __ delayed()->nop();
-      __ sub(count, G1, count);
-    __ BIND(L_align);
-      __ dec(end_from);
-      __ dec(end_to);
-      __ ldub(end_from, 0, O3);
-      __ deccc(G1);
-      __ brx(Assembler::notZero, false, Assembler::pt, L_align);
-      __ delayed()->stb(O3, end_to, 0);
-    __ BIND(L_skip_alignment);
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+
+      __ add(to, count, end_to);       // offset after last copied element
+
+      // for short arrays, just do single element copy
+      __ cmp(count, 23); // 16 + 7
+      __ brx(Assembler::less, false, Assembler::pn, L_copy_byte);
+      __ delayed()->add(from, count, end_from);
+
+      {
+        // Align end of arrays since they could be not aligned even
+        // when arrays itself are aligned.
+
+        // copy bytes to align 'end_to' on 8 byte boundary
+        __ andcc(end_to, 7, G1); // misaligned bytes
+        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
+        __ delayed()->nop();
+        __ sub(count, G1, count);
+      __ BIND(L_align);
+        __ dec(end_from);
+        __ dec(end_to);
+        __ ldub(end_from, 0, O3);
+        __ deccc(G1);
+        __ brx(Assembler::notZero, false, Assembler::pt, L_align);
+        __ delayed()->stb(O3, end_to, 0);
+      __ BIND(L_skip_alignment);
+      }
+      if (aligned) {
+        // Both arrays are aligned to 8-bytes in 64-bits VM.
+        // The 'count' is decremented in copy_16_bytes_backward_with_shift()
+        // in unaligned case.
+        __ dec(count, 16);
+      } else {
+        // Copy with shift 16 bytes per iteration if arrays do not have
+        // the same alignment mod 8, otherwise jump to the next
+        // code for aligned copy (and substracting 16 from 'count' before jump).
+        // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
+        // Also jump over aligned copy after the copy with shift completed.
+
+       copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
+                                          L_aligned_copy, L_copy_byte);
+      }
+      // copy 4 elements (16 bytes) at a time
+        __ align(OptoLoopAlignment);
+      __ BIND(L_aligned_copy);
+        __ dec(end_from, 16);
+        __ ldx(end_from, 8, O3);
+        __ ldx(end_from, 0, O4);
+        __ dec(end_to, 16);
+        __ deccc(count, 16);
+        __ stx(O3, end_to, 8);
+        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
+        __ delayed()->stx(O4, end_to, 0);
+        __ inc(count, 16);
+
+      // copy 1 element (2 bytes) at a time
+      __ BIND(L_copy_byte);
+        __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
+        __ align(OptoLoopAlignment);
+      __ BIND(L_copy_byte_loop);
+        __ dec(end_from);
+        __ dec(end_to);
+        __ ldub(end_from, 0, O4);
+        __ deccc(count);
+        __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
+        __ delayed()->stb(O4, end_to, 0);
     }
-    if (aligned) {
-      // Both arrays are aligned to 8-bytes in 64-bits VM.
-      // The 'count' is decremented in copy_16_bytes_backward_with_shift()
-      // in unaligned case.
-      __ dec(count, 16);
-    } else {
-      // Copy with shift 16 bytes per iteration if arrays do not have
-      // the same alignment mod 8, otherwise jump to the next
-      // code for aligned copy (and substracting 16 from 'count' before jump).
-      // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
-      // Also jump over aligned copy after the copy with shift completed.
-
-      copy_16_bytes_backward_with_shift(end_from, end_to, count, 16,
-                                        L_aligned_copy, L_copy_byte);
-    }
-    // copy 4 elements (16 bytes) at a time
-      __ align(OptoLoopAlignment);
-    __ BIND(L_aligned_copy);
-      __ dec(end_from, 16);
-      __ ldx(end_from, 8, O3);
-      __ ldx(end_from, 0, O4);
-      __ dec(end_to, 16);
-      __ deccc(count, 16);
-      __ stx(O3, end_to, 8);
-      __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
-      __ delayed()->stx(O4, end_to, 0);
-      __ inc(count, 16);
-
-    // copy 1 element (2 bytes) at a time
-    __ BIND(L_copy_byte);
-      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_byte_loop);
-      __ dec(end_from);
-      __ dec(end_to);
-      __ ldub(end_from, 0, O4);
-      __ deccc(count);
-      __ brx(Assembler::greater, false, Assembler::pt, L_copy_byte_loop);
-      __ delayed()->stb(O4, end_to, 0);
 
     __ BIND(L_exit);
     // O3, O4 are used as temp registers
@@ -1311,68 +1332,72 @@
       BLOCK_COMMENT("Entry:");
     }
 
-    // for short arrays, just do single element copy
-    __ cmp(count, 11); // 8 + 3  (22 bytes)
-    __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
-    __ delayed()->mov(G0, offset);
-
-    if (aligned) {
-      // 'aligned' == true when it is known statically during compilation
-      // of this arraycopy call site that both 'from' and 'to' addresses
-      // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
-      //
-      // Aligned arrays have 4 bytes alignment in 32-bits VM
-      // and 8 bytes - in 64-bits VM.
-      //
-    } else {
-      // copy 1 element if necessary to align 'to' on an 4 bytes
-      __ andcc(to, 3, G0);
-      __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
-      __ delayed()->lduh(from, 0, O3);
-      __ inc(from, 2);
-      __ inc(to, 2);
-      __ dec(count);
-      __ sth(O3, to, -2);
-    __ BIND(L_skip_alignment);
-
-      // copy 2 elements to align 'to' on an 8 byte boundary
-      __ andcc(to, 7, G0);
-      __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
-      __ delayed()->lduh(from, 0, O3);
-      __ dec(count, 2);
-      __ lduh(from, 2, O4);
-      __ inc(from, 4);
-      __ inc(to, 4);
-      __ sth(O3, to, -4);
-      __ sth(O4, to, -2);
-    __ BIND(L_skip_alignment2);
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      // for short arrays, just do single element copy
+      __ cmp(count, 11); // 8 + 3  (22 bytes)
+      __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
+      __ delayed()->mov(G0, offset);
+
+      if (aligned) {
+        // 'aligned' == true when it is known statically during compilation
+        // of this arraycopy call site that both 'from' and 'to' addresses
+        // are HeapWordSize aligned (see LibraryCallKit::basictype2arraycopy()).
+        //
+        // Aligned arrays have 4 bytes alignment in 32-bits VM
+        // and 8 bytes - in 64-bits VM.
+        //
+      } else {
+        // copy 1 element if necessary to align 'to' on an 4 bytes
+        __ andcc(to, 3, G0);
+        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
+        __ delayed()->lduh(from, 0, O3);
+        __ inc(from, 2);
+        __ inc(to, 2);
+        __ dec(count);
+        __ sth(O3, to, -2);
+      __ BIND(L_skip_alignment);
+
+        // copy 2 elements to align 'to' on an 8 byte boundary
+        __ andcc(to, 7, G0);
+        __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
+        __ delayed()->lduh(from, 0, O3);
+        __ dec(count, 2);
+        __ lduh(from, 2, O4);
+        __ inc(from, 4);
+        __ inc(to, 4);
+        __ sth(O3, to, -4);
+        __ sth(O4, to, -2);
+      __ BIND(L_skip_alignment2);
+      }
+      if (!aligned) {
+        // Copy with shift 16 bytes per iteration if arrays do not have
+        // the same alignment mod 8, otherwise fall through to the next
+        // code for aligned copy.
+        // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
+        // Also jump over aligned copy after the copy with shift completed.
+
+        copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
+      }
+
+      // Both array are 8 bytes aligned, copy 16 bytes at a time
+        __ and3(count, 3, G4); // Save
+        __ srl(count, 2, count);
+       generate_disjoint_long_copy_core(aligned);
+        __ mov(G4, count); // restore
+
+      // copy 1 element at a time
+      __ BIND(L_copy_2_bytes);
+        __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
+        __ align(OptoLoopAlignment);
+      __ BIND(L_copy_2_bytes_loop);
+        __ lduh(from, offset, O3);
+        __ deccc(count);
+        __ sth(O3, to, offset);
+        __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
+        __ delayed()->inc(offset, 2);
     }
-    if (!aligned) {
-      // Copy with shift 16 bytes per iteration if arrays do not have
-      // the same alignment mod 8, otherwise fall through to the next
-      // code for aligned copy.
-      // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
-      // Also jump over aligned copy after the copy with shift completed.
-
-      copy_16_bytes_forward_with_shift(from, to, count, 1, L_copy_2_bytes);
-    }
-
-    // Both array are 8 bytes aligned, copy 16 bytes at a time
-      __ and3(count, 3, G4); // Save
-      __ srl(count, 2, count);
-     generate_disjoint_long_copy_core(aligned);
-      __ mov(G4, count); // restore
-
-    // copy 1 element at a time
-    __ BIND(L_copy_2_bytes);
-      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_2_bytes_loop);
-      __ lduh(from, offset, O3);
-      __ deccc(count);
-      __ sth(O3, to, offset);
-      __ brx(Assembler::notZero, false, Assembler::pt, L_copy_2_bytes_loop);
-      __ delayed()->inc(offset, 2);
 
     __ BIND(L_exit);
       // O3, O4 are used as temp registers
@@ -1639,79 +1664,83 @@
 
     array_overlap_test(nooverlap_target, 1);
 
-    __ sllx(count, LogBytesPerShort, byte_count);
-    __ add(to, byte_count, end_to);  // offset after last copied element
-
-    // for short arrays, just do single element copy
-    __ cmp(count, 11); // 8 + 3  (22 bytes)
-    __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
-    __ delayed()->add(from, byte_count, end_from);
-
     {
-      // Align end of arrays since they could be not aligned even
-      // when arrays itself are aligned.
-
-      // copy 1 element if necessary to align 'end_to' on an 4 bytes
-      __ andcc(end_to, 3, G0);
-      __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
-      __ delayed()->lduh(end_from, -2, O3);
-      __ dec(end_from, 2);
-      __ dec(end_to, 2);
-      __ dec(count);
-      __ sth(O3, end_to, 0);
-    __ BIND(L_skip_alignment);
-
-      // copy 2 elements to align 'end_to' on an 8 byte boundary
-      __ andcc(end_to, 7, G0);
-      __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
-      __ delayed()->lduh(end_from, -2, O3);
-      __ dec(count, 2);
-      __ lduh(end_from, -4, O4);
-      __ dec(end_from, 4);
-      __ dec(end_to, 4);
-      __ sth(O3, end_to, 2);
-      __ sth(O4, end_to, 0);
-    __ BIND(L_skip_alignment2);
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+
+      __ sllx(count, LogBytesPerShort, byte_count);
+      __ add(to, byte_count, end_to);  // offset after last copied element
+
+      // for short arrays, just do single element copy
+      __ cmp(count, 11); // 8 + 3  (22 bytes)
+      __ brx(Assembler::less, false, Assembler::pn, L_copy_2_bytes);
+      __ delayed()->add(from, byte_count, end_from);
+
+      {
+        // Align end of arrays since they could be not aligned even
+        // when arrays itself are aligned.
+
+        // copy 1 element if necessary to align 'end_to' on an 4 bytes
+        __ andcc(end_to, 3, G0);
+        __ br(Assembler::zero, false, Assembler::pt, L_skip_alignment);
+        __ delayed()->lduh(end_from, -2, O3);
+        __ dec(end_from, 2);
+        __ dec(end_to, 2);
+        __ dec(count);
+        __ sth(O3, end_to, 0);
+      __ BIND(L_skip_alignment);
+
+        // copy 2 elements to align 'end_to' on an 8 byte boundary
+        __ andcc(end_to, 7, G0);
+        __ br(Assembler::zero, false, Assembler::pn, L_skip_alignment2);
+        __ delayed()->lduh(end_from, -2, O3);
+        __ dec(count, 2);
+        __ lduh(end_from, -4, O4);
+        __ dec(end_from, 4);
+        __ dec(end_to, 4);
+        __ sth(O3, end_to, 2);
+        __ sth(O4, end_to, 0);
+      __ BIND(L_skip_alignment2);
+      }
+      if (aligned) {
+        // Both arrays are aligned to 8-bytes in 64-bits VM.
+        // The 'count' is decremented in copy_16_bytes_backward_with_shift()
+        // in unaligned case.
+        __ dec(count, 8);
+      } else {
+        // Copy with shift 16 bytes per iteration if arrays do not have
+        // the same alignment mod 8, otherwise jump to the next
+        // code for aligned copy (and substracting 8 from 'count' before jump).
+        // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
+        // Also jump over aligned copy after the copy with shift completed.
+
+        copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
+                                        L_aligned_copy, L_copy_2_bytes);
+      }
+      // copy 4 elements (16 bytes) at a time
+        __ align(OptoLoopAlignment);
+      __ BIND(L_aligned_copy);
+        __ dec(end_from, 16);
+        __ ldx(end_from, 8, O3);
+        __ ldx(end_from, 0, O4);
+        __ dec(end_to, 16);
+        __ deccc(count, 8);
+        __ stx(O3, end_to, 8);
+        __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
+        __ delayed()->stx(O4, end_to, 0);
+        __ inc(count, 8);
+
+      // copy 1 element (2 bytes) at a time
+      __ BIND(L_copy_2_bytes);
+        __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
+      __ BIND(L_copy_2_bytes_loop);
+        __ dec(end_from, 2);
+        __ dec(end_to, 2);
+        __ lduh(end_from, 0, O4);
+        __ deccc(count);
+        __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
+        __ delayed()->sth(O4, end_to, 0);
     }
-    if (aligned) {
-      // Both arrays are aligned to 8-bytes in 64-bits VM.
-      // The 'count' is decremented in copy_16_bytes_backward_with_shift()
-      // in unaligned case.
-      __ dec(count, 8);
-    } else {
-      // Copy with shift 16 bytes per iteration if arrays do not have
-      // the same alignment mod 8, otherwise jump to the next
-      // code for aligned copy (and substracting 8 from 'count' before jump).
-      // The compare above (count >= 11) guarantes 'count' >= 16 bytes.
-      // Also jump over aligned copy after the copy with shift completed.
-
-      copy_16_bytes_backward_with_shift(end_from, end_to, count, 8,
-                                        L_aligned_copy, L_copy_2_bytes);
-    }
-    // copy 4 elements (16 bytes) at a time
-      __ align(OptoLoopAlignment);
-    __ BIND(L_aligned_copy);
-      __ dec(end_from, 16);
-      __ ldx(end_from, 8, O3);
-      __ ldx(end_from, 0, O4);
-      __ dec(end_to, 16);
-      __ deccc(count, 8);
-      __ stx(O3, end_to, 8);
-      __ brx(Assembler::greaterEqual, false, Assembler::pt, L_aligned_copy);
-      __ delayed()->stx(O4, end_to, 0);
-      __ inc(count, 8);
-
-    // copy 1 element (2 bytes) at a time
-    __ BIND(L_copy_2_bytes);
-      __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_exit);
-    __ BIND(L_copy_2_bytes_loop);
-      __ dec(end_from, 2);
-      __ dec(end_to, 2);
-      __ lduh(end_from, 0, O4);
-      __ deccc(count);
-      __ brx(Assembler::greater, false, Assembler::pt, L_copy_2_bytes_loop);
-      __ delayed()->sth(O4, end_to, 0);
-
     __ BIND(L_exit);
     // O3, O4 are used as temp registers
     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr, O3, O4);
@@ -1870,9 +1899,11 @@
       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
       BLOCK_COMMENT("Entry:");
     }
-
-    generate_disjoint_int_copy_core(aligned);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_disjoint_int_copy_core(aligned);
+    }
     // O3, O4 are used as temp registers
     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
     __ retl();
@@ -2005,9 +2036,11 @@
     }
 
     array_overlap_test(nooverlap_target, 2);
-
-    generate_conjoint_int_copy_core(aligned);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false);
+      generate_conjoint_int_copy_core(aligned);
+    }
     // O3, O4 are used as temp registers
     inc_counter_np(SharedRuntime::_jint_array_copy_ctr, O3, O4);
     __ retl();
@@ -2156,8 +2189,11 @@
       BLOCK_COMMENT("Entry:");
     }
 
-    generate_disjoint_long_copy_core(aligned);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, true, false);
+      generate_disjoint_long_copy_core(aligned);
+    }
     // O3, O4 are used as temp registers
     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
     __ retl();
@@ -2232,9 +2268,11 @@
     }
 
     array_overlap_test(nooverlap_target, 3);
-
-    generate_conjoint_long_copy_core(aligned);
-
+    {
+      // UnsafeCopyMemory page error: continue at UnsafeCopyMemory common_error_exit
+      UnsafeCopyMemoryMark ucmm(this, true, false);
+      generate_conjoint_long_copy_core(aligned);
+    }
     // O3, O4 are used as temp registers
     inc_counter_np(SharedRuntime::_jlong_array_copy_ctr, O3, O4);
     __ retl();
@@ -2929,6 +2967,9 @@
     address entry_jlong_arraycopy;
     address entry_checkcast_arraycopy;
 
+    address ucm_common_error_exit       =  generate_unsafecopy_common_error_exit();
+    UnsafeCopyMemory::set_common_exit_stub_pc(ucm_common_error_exit);
+
     //*** jbyte
     // Always need aligned and unaligned versions
     StubRoutines::_jbyte_disjoint_arraycopy         = generate_disjoint_byte_copy(false, &entry,
@@ -5821,6 +5862,10 @@
 
 }; // end class declaration
 
+#define UCM_TABLE_MAX_ENTRIES 8
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }

--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -789,6 +789,8 @@
     case 0x59: // mulpd
     case 0x6E: // movd
     case 0x7E: // movd
+    case 0x6F: // movdq
+    case 0x7F: // movdq
     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
     case 0xFE: // paddd
       debug_only(has_disp32 = true);
@@ -4274,6 +4276,7 @@
   emit_operand(dst, src);
   emit_int8(mode & 0xFF);
 }
+
 void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
   assert(VM_Version::supports_evex(), "requires EVEX support");
   assert(vector_len == Assembler::AVX_256bit || vector_len == Assembler::AVX_512bit, "");

--- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -889,91 +889,98 @@
 
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, t, from, to, count);
-
-    __ subptr(to, from); // to --> to_from
-    __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
-    __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
-    if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
-      // align source address at 4 bytes address boundary
-      if (t == T_BYTE) {
-        // One byte misalignment happens only for byte arrays
-        __ testl(from, 1);
-        __ jccb(Assembler::zero, L_skip_align1);
-        __ movb(rax, Address(from, 0));
-        __ movb(Address(from, to_from, Address::times_1, 0), rax);
-        __ increment(from);
-        __ decrement(count);
-      __ BIND(L_skip_align1);
+    {
+      bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT));
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+      __ subptr(to, from); // to --> to_from
+      __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
+      __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
+      if (!UseUnalignedLoadStores && !aligned && (t == T_BYTE || t == T_SHORT)) {
+        // align source address at 4 bytes address boundary
+        if (t == T_BYTE) {
+          // One byte misalignment happens only for byte arrays
+          __ testl(from, 1);
+          __ jccb(Assembler::zero, L_skip_align1);
+          __ movb(rax, Address(from, 0));
+          __ movb(Address(from, to_from, Address::times_1, 0), rax);
+          __ increment(from);
+          __ decrement(count);
+        __ BIND(L_skip_align1);
+        }
+        // Two bytes misalignment happens only for byte and short (char) arrays
+        __ testl(from, 2);
+        __ jccb(Assembler::zero, L_skip_align2);
+        __ movw(rax, Address(from, 0));
+        __ movw(Address(from, to_from, Address::times_1, 0), rax);
+        __ addptr(from, 2);
+        __ subl(count, 1<<(shift-1));
+      __ BIND(L_skip_align2);
       }
-      // Two bytes misalignment happens only for byte and short (char) arrays
-      __ testl(from, 2);
-      __ jccb(Assembler::zero, L_skip_align2);
-      __ movw(rax, Address(from, 0));
-      __ movw(Address(from, to_from, Address::times_1, 0), rax);
-      __ addptr(from, 2);
-      __ subl(count, 1<<(shift-1));
-    __ BIND(L_skip_align2);
-    }
-    if (!VM_Version::supports_mmx()) {
-      __ mov(rax, count);      // save 'count'
-      __ shrl(count, shift); // bytes count
-      __ addptr(to_from, from);// restore 'to'
-      __ rep_mov();
-      __ subptr(to_from, from);// restore 'to_from'
-      __ mov(count, rax);      // restore 'count'
-      __ jmpb(L_copy_2_bytes); // all dwords were copied
-    } else {
-      if (!UseUnalignedLoadStores) {
-        // align to 8 bytes, we know we are 4 byte aligned to start
-        __ testptr(from, 4);
-        __ jccb(Assembler::zero, L_copy_64_bytes);
-        __ movl(rax, Address(from, 0));
-        __ movl(Address(from, to_from, Address::times_1, 0), rax);
+      if (!VM_Version::supports_mmx()) {
+        __ mov(rax, count);      // save 'count'
+        __ shrl(count, shift); // bytes count
+        __ addptr(to_from, from);// restore 'to'
+        __ rep_mov();
+        __ subptr(to_from, from);// restore 'to_from'
+        __ mov(count, rax);      // restore 'count'
+        __ jmpb(L_copy_2_bytes); // all dwords were copied
+      } else {
+        if (!UseUnalignedLoadStores) {
+          // align to 8 bytes, we know we are 4 byte aligned to start
+          __ testptr(from, 4);
+          __ jccb(Assembler::zero, L_copy_64_bytes);
+          __ movl(rax, Address(from, 0));
+          __ movl(Address(from, to_from, Address::times_1, 0), rax);
+          __ addptr(from, 4);
+          __ subl(count, 1<<shift);
+         }
+      __ BIND(L_copy_64_bytes);
+        __ mov(rax, count);
+        __ shrl(rax, shift+1);  // 8 bytes chunk count
+        //
+        // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
+        //
+        if (UseXMMForArrayCopy) {
+          xmm_copy_forward(from, to_from, rax);
+        } else {
+          mmx_copy_forward(from, to_from, rax);
+        }
+      }
+      // copy tailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(count, 1<<shift);
+      __ jccb(Assembler::zero, L_copy_2_bytes);
+      __ movl(rax, Address(from, 0));
+      __ movl(Address(from, to_from, Address::times_1, 0), rax);
+      if (t == T_BYTE || t == T_SHORT) {
         __ addptr(from, 4);
-        __ subl(count, 1<<shift);
-      }
-    __ BIND(L_copy_64_bytes);
-      __ mov(rax, count);
-      __ shrl(rax, shift+1);  // 8 bytes chunk count
-      //
-      // Copy 8-byte chunks through MMX registers, 8 per iteration of the loop
-      //
-      if (UseXMMForArrayCopy) {
-        xmm_copy_forward(from, to_from, rax);
+      __ BIND(L_copy_2_bytes);
+        // copy tailing word
+        __ testl(count, 1<<(shift-1));
+        __ jccb(Assembler::zero, L_copy_byte);
+        __ movw(rax, Address(from, 0));
+        __ movw(Address(from, to_from, Address::times_1, 0), rax);
+        if (t == T_BYTE) {
+          __ addptr(from, 2);
+        __ BIND(L_copy_byte);
+          // copy tailing byte
+          __ testl(count, 1);
+          __ jccb(Assembler::zero, L_exit);
+          __ movb(rax, Address(from, 0));
+          __ movb(Address(from, to_from, Address::times_1, 0), rax);
+        __ BIND(L_exit);
+        } else {
+        __ BIND(L_copy_byte);
+        }
       } else {
-        mmx_copy_forward(from, to_from, rax);
+      __ BIND(L_copy_2_bytes);
       }
     }
-    // copy tailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(count, 1<<shift);
-    __ jccb(Assembler::zero, L_copy_2_bytes);
-    __ movl(rax, Address(from, 0));
-    __ movl(Address(from, to_from, Address::times_1, 0), rax);
-    if (t == T_BYTE || t == T_SHORT) {
-      __ addptr(from, 4);
-    __ BIND(L_copy_2_bytes);
-      // copy tailing word
-      __ testl(count, 1<<(shift-1));
-      __ jccb(Assembler::zero, L_copy_byte);
-      __ movw(rax, Address(from, 0));
-      __ movw(Address(from, to_from, Address::times_1, 0), rax);
-      if (t == T_BYTE) {
-        __ addptr(from, 2);
-      __ BIND(L_copy_byte);
-        // copy tailing byte
-        __ testl(count, 1);
-        __ jccb(Assembler::zero, L_exit);
-        __ movb(rax, Address(from, 0));
-        __ movb(Address(from, to_from, Address::times_1, 0), rax);
-      __ BIND(L_exit);
-      } else {
-      __ BIND(L_copy_byte);
-      }
-    } else {
-    __ BIND(L_copy_2_bytes);
+
+    if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
+      __ emms();
     }
-
     __ movl(count, Address(rsp, 12+12)); // reread 'count'
     bs->arraycopy_epilogue(_masm, decorators, t, from, to, count);
 
@@ -1079,104 +1086,112 @@
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, t, from, to, count);
 
-    // copy from high to low
-    __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
-    __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
-    if (t == T_BYTE || t == T_SHORT) {
-      // Align the end of destination array at 4 bytes address boundary
-      __ lea(end, Address(dst, count, sf, 0));
-      if (t == T_BYTE) {
-        // One byte misalignment happens only for byte arrays
-        __ testl(end, 1);
-        __ jccb(Assembler::zero, L_skip_align1);
-        __ decrement(count);
-        __ movb(rdx, Address(from, count, sf, 0));
-        __ movb(Address(to, count, sf, 0), rdx);
-      __ BIND(L_skip_align1);
-      }
-      // Two bytes misalignment happens only for byte and short (char) arrays
-      __ testl(end, 2);
-      __ jccb(Assembler::zero, L_skip_align2);
-      __ subptr(count, 1<<(shift-1));
-      __ movw(rdx, Address(from, count, sf, 0));
-      __ movw(Address(to, count, sf, 0), rdx);
-    __ BIND(L_skip_align2);
+    {
+      bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT));
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, add_entry, true);
+      // copy from high to low
       __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
-      __ jcc(Assembler::below, L_copy_4_bytes);
-    }
-
-    if (!VM_Version::supports_mmx()) {
-      __ std();
-      __ mov(rax, count); // Save 'count'
-      __ mov(rdx, to);    // Save 'to'
-      __ lea(rsi, Address(from, count, sf, -4));
-      __ lea(rdi, Address(to  , count, sf, -4));
-      __ shrptr(count, shift); // bytes count
-      __ rep_mov();
-      __ cld();
-      __ mov(count, rax); // restore 'count'
-      __ andl(count, (1<<shift)-1);      // mask the number of rest elements
-      __ movptr(from, Address(rsp, 12+4)); // reread 'from'
-      __ mov(to, rdx);   // restore 'to'
-      __ jmpb(L_copy_2_bytes); // all dword were copied
-   } else {
-      // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
-      __ testptr(end, 4);
-      __ jccb(Assembler::zero, L_copy_8_bytes);
-      __ subl(count, 1<<shift);
-      __ movl(rdx, Address(from, count, sf, 0));
-      __ movl(Address(to, count, sf, 0), rdx);
-      __ jmpb(L_copy_8_bytes);
-
-      __ align(OptoLoopAlignment);
-      // Move 8 bytes
-    __ BIND(L_copy_8_bytes_loop);
-      if (UseXMMForArrayCopy) {
-        __ movq(xmm0, Address(from, count, sf, 0));
-        __ movq(Address(to, count, sf, 0), xmm0);
+      __ jcc(Assembler::below, L_copy_4_bytes); // use unsigned cmp
+      if (t == T_BYTE || t == T_SHORT) {
+        // Align the end of destination array at 4 bytes address boundary
+        __ lea(end, Address(dst, count, sf, 0));
+        if (t == T_BYTE) {
+          // One byte misalignment happens only for byte arrays
+          __ testl(end, 1);
+          __ jccb(Assembler::zero, L_skip_align1);
+          __ decrement(count);
+          __ movb(rdx, Address(from, count, sf, 0));
+          __ movb(Address(to, count, sf, 0), rdx);
+        __ BIND(L_skip_align1);
+        }
+        // Two bytes misalignment happens only for byte and short (char) arrays
+        __ testl(end, 2);
+        __ jccb(Assembler::zero, L_skip_align2);
+        __ subptr(count, 1<<(shift-1));
+        __ movw(rdx, Address(from, count, sf, 0));
+        __ movw(Address(to, count, sf, 0), rdx);
+      __ BIND(L_skip_align2);
+        __ cmpl(count, 2<<shift); // Short arrays (< 8 bytes) copy by element
+        __ jcc(Assembler::below, L_copy_4_bytes);
+      }
+
+      if (!VM_Version::supports_mmx()) {
+        __ std();
+        __ mov(rax, count); // Save 'count'
+        __ mov(rdx, to);    // Save 'to'
+        __ lea(rsi, Address(from, count, sf, -4));
+        __ lea(rdi, Address(to  , count, sf, -4));
+        __ shrptr(count, shift); // bytes count
+        __ rep_mov();
+        __ cld();
+        __ mov(count, rax); // restore 'count'
+        __ andl(count, (1<<shift)-1);      // mask the number of rest elements
+        __ movptr(from, Address(rsp, 12+4)); // reread 'from'
+        __ mov(to, rdx);   // restore 'to'
+        __ jmpb(L_copy_2_bytes); // all dword were copied
       } else {
-        __ movq(mmx0, Address(from, count, sf, 0));
-        __ movq(Address(to, count, sf, 0), mmx0);
+        // Align to 8 bytes the end of array. It is aligned to 4 bytes already.
+        __ testptr(end, 4);
+        __ jccb(Assembler::zero, L_copy_8_bytes);
+        __ subl(count, 1<<shift);
+        __ movl(rdx, Address(from, count, sf, 0));
+        __ movl(Address(to, count, sf, 0), rdx);
+        __ jmpb(L_copy_8_bytes);
+
+        __ align(OptoLoopAlignment);
+        // Move 8 bytes
+      __ BIND(L_copy_8_bytes_loop);
+        if (UseXMMForArrayCopy) {
+          __ movq(xmm0, Address(from, count, sf, 0));
+          __ movq(Address(to, count, sf, 0), xmm0);
+        } else {
+          __ movq(mmx0, Address(from, count, sf, 0));
+          __ movq(Address(to, count, sf, 0), mmx0);
+        }
+      __ BIND(L_copy_8_bytes);
+        __ subl(count, 2<<shift);
+        __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
+        __ addl(count, 2<<shift);
+        if (!UseXMMForArrayCopy) {
+          __ emms();
+        }
       }
-    __ BIND(L_copy_8_bytes);
-      __ subl(count, 2<<shift);
-      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
-      __ addl(count, 2<<shift);
-      if (!UseXMMForArrayCopy) {
-        __ emms();
+    __ BIND(L_copy_4_bytes);
+      // copy prefix qword
+      __ testl(count, 1<<shift);
+      __ jccb(Assembler::zero, L_copy_2_bytes);
+      __ movl(rdx, Address(from, count, sf, -4));
+      __ movl(Address(to, count, sf, -4), rdx);
+
+      if (t == T_BYTE || t == T_SHORT) {
+          __ subl(count, (1<<shift));
+        __ BIND(L_copy_2_bytes);
+          // copy prefix dword
+          __ testl(count, 1<<(shift-1));
+          __ jccb(Assembler::zero, L_copy_byte);
+          __ movw(rdx, Address(from, count, sf, -2));
+          __ movw(Address(to, count, sf, -2), rdx);
+          if (t == T_BYTE) {
+            __ subl(count, 1<<(shift-1));
+          __ BIND(L_copy_byte);
+            // copy prefix byte
+            __ testl(count, 1);
+            __ jccb(Assembler::zero, L_exit);
+            __ movb(rdx, Address(from, 0));
+            __ movb(Address(to, 0), rdx);
+          __ BIND(L_exit);
+          } else {
+          __ BIND(L_copy_byte);
+          }
+      } else {
+      __ BIND(L_copy_2_bytes);
       }
     }
-  __ BIND(L_copy_4_bytes);
-    // copy prefix qword
-    __ testl(count, 1<<shift);
-    __ jccb(Assembler::zero, L_copy_2_bytes);
-    __ movl(rdx, Address(from, count, sf, -4));
-    __ movl(Address(to, count, sf, -4), rdx);
-
-    if (t == T_BYTE || t == T_SHORT) {
-        __ subl(count, (1<<shift));
-      __ BIND(L_copy_2_bytes);
-        // copy prefix dword
-        __ testl(count, 1<<(shift-1));
-        __ jccb(Assembler::zero, L_copy_byte);
-        __ movw(rdx, Address(from, count, sf, -2));
-        __ movw(Address(to, count, sf, -2), rdx);
-        if (t == T_BYTE) {
-          __ subl(count, 1<<(shift-1));
-        __ BIND(L_copy_byte);
-          // copy prefix byte
-          __ testl(count, 1);
-          __ jccb(Assembler::zero, L_exit);
-          __ movb(rdx, Address(from, 0));
-          __ movb(Address(to, 0), rdx);
-        __ BIND(L_exit);
-        } else {
-        __ BIND(L_copy_byte);
-        }
-    } else {
-    __ BIND(L_copy_2_bytes);
+
+    if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
+      __ emms();
     }
-
     __ movl2ptr(count, Address(rsp, 12+12)); // reread count
     bs->arraycopy_epilogue(_masm, decorators, t, from, to, count);
 
@@ -1212,23 +1227,30 @@
     *entry = __ pc(); // Entry point from conjoint arraycopy stub.
     BLOCK_COMMENT("Entry:");
 
-    __ subptr(to, from); // to --> to_from
-    if (VM_Version::supports_mmx()) {
-      if (UseXMMForArrayCopy) {
-        xmm_copy_forward(from, to_from, count);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, true, true);
+      __ subptr(to, from); // to --> to_from
+      if (VM_Version::supports_mmx()) {
+        if (UseXMMForArrayCopy) {
+          xmm_copy_forward(from, to_from, count);
+        } else {
+          mmx_copy_forward(from, to_from, count);
+        }
       } else {
-        mmx_copy_forward(from, to_from, count);
+        __ jmpb(L_copy_8_bytes);
+        __ align(OptoLoopAlignment);
+      __ BIND(L_copy_8_bytes_loop);
+        __ fild_d(Address(from, 0));
+        __ fistp_d(Address(from, to_from, Address::times_1));
+        __ addptr(from, 8);
+      __ BIND(L_copy_8_bytes);
+        __ decrement(count);
+        __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
       }
-    } else {
-      __ jmpb(L_copy_8_bytes);
-      __ align(OptoLoopAlignment);
-    __ BIND(L_copy_8_bytes_loop);
-      __ fild_d(Address(from, 0));
-      __ fistp_d(Address(from, to_from, Address::times_1));
-      __ addptr(from, 8);
-    __ BIND(L_copy_8_bytes);
-      __ decrement(count);
-      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
+    }
+    if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
+      __ emms();
     }
     inc_copy_counter_np(T_LONG);
     __ leave(); // required for proper stackwalking of RuntimeStub frame
@@ -1267,26 +1289,31 @@
     __ movptr(from, Address(rsp, 8));  // from
     __ jump_cc(Assembler::aboveEqual, nooverlap);
 
-    __ jmpb(L_copy_8_bytes);
-
-    __ align(OptoLoopAlignment);
-  __ BIND(L_copy_8_bytes_loop);
-    if (VM_Version::supports_mmx()) {
-      if (UseXMMForArrayCopy) {
-        __ movq(xmm0, Address(from, count, Address::times_8));
-        __ movq(Address(to, count, Address::times_8), xmm0);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, true, true);
+
+      __ jmpb(L_copy_8_bytes);
+
+      __ align(OptoLoopAlignment);
+    __ BIND(L_copy_8_bytes_loop);
+      if (VM_Version::supports_mmx()) {
+        if (UseXMMForArrayCopy) {
+          __ movq(xmm0, Address(from, count, Address::times_8));
+          __ movq(Address(to, count, Address::times_8), xmm0);
+        } else {
+          __ movq(mmx0, Address(from, count, Address::times_8));
+          __ movq(Address(to, count, Address::times_8), mmx0);
+        }
       } else {
-        __ movq(mmx0, Address(from, count, Address::times_8));
-        __ movq(Address(to, count, Address::times_8), mmx0);
+        __ fild_d(Address(from, count, Address::times_8));
+        __ fistp_d(Address(to, count, Address::times_8));
       }
-    } else {
-      __ fild_d(Address(from, count, Address::times_8));
-      __ fistp_d(Address(to, count, Address::times_8));
+    __ BIND(L_copy_8_bytes);
+      __ decrement(count);
+      __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
+
     }
-  __ BIND(L_copy_8_bytes);
-    __ decrement(count);
-    __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop);
-
     if (VM_Version::supports_mmx() && !UseXMMForArrayCopy) {
       __ emms();
     }
@@ -3945,7 +3972,10 @@
   }
 }; // end class declaration
 
-
+#define UCM_TABLE_MAX_ENTRIES 8
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }

--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1433,7 +1433,6 @@
     __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
   }
 
-
   // Arguments:
   //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
   //             ignored
@@ -1482,51 +1481,55 @@
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
 
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(byte_count, count);
-    __ shrptr(count, 3); // count => qword_count
-
-    // Copy from low to high addresses.  Use 'to' as scratch.
-    __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
-    __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
-    __ negptr(qword_count); // make the count negative
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
-    __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
-    __ increment(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
-    // Check for and copy trailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(byte_count, 4);
-    __ jccb(Assembler::zero, L_copy_2_bytes);
-    __ movl(rax, Address(end_from, 8));
-    __ movl(Address(end_to, 8), rax);
-
-    __ addptr(end_from, 4);
-    __ addptr(end_to, 4);
-
-    // Check for and copy trailing word
-  __ BIND(L_copy_2_bytes);
-    __ testl(byte_count, 2);
-    __ jccb(Assembler::zero, L_copy_byte);
-    __ movw(rax, Address(end_from, 8));
-    __ movw(Address(end_to, 8), rax);
-
-    __ addptr(end_from, 2);
-    __ addptr(end_to, 2);
-
-    // Check for and copy trailing byte
-  __ BIND(L_copy_byte);
-    __ testl(byte_count, 1);
-    __ jccb(Assembler::zero, L_exit);
-    __ movb(rax, Address(end_from, 8));
-    __ movb(Address(end_to, 8), rax);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(byte_count, count);
+      __ shrptr(count, 3); // count => qword_count
+
+      // Copy from low to high addresses.  Use 'to' as scratch.
+      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
+      __ negptr(qword_count); // make the count negative
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
+      __ increment(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+
+      // Check for and copy trailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(byte_count, 4);
+      __ jccb(Assembler::zero, L_copy_2_bytes);
+      __ movl(rax, Address(end_from, 8));
+      __ movl(Address(end_to, 8), rax);
+
+      __ addptr(end_from, 4);
+      __ addptr(end_to, 4);
+
+      // Check for and copy trailing word
+    __ BIND(L_copy_2_bytes);
+      __ testl(byte_count, 2);
+      __ jccb(Assembler::zero, L_copy_byte);
+      __ movw(rax, Address(end_from, 8));
+      __ movw(Address(end_to, 8), rax);
+
+      __ addptr(end_from, 2);
+      __ addptr(end_to, 2);
+
+      // Check for and copy trailing byte
+    __ BIND(L_copy_byte);
+      __ testl(byte_count, 1);
+      __ jccb(Assembler::zero, L_exit);
+      __ movb(rax, Address(end_from, 8));
+      __ movb(Address(end_to, 8), rax);
+    }
   __ BIND(L_exit);
+    address ucme_exit_pc = __ pc();
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1534,10 +1537,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-    __ jmp(L_copy_4_bytes);
-
+    {
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
+      // Copy in multi-bytes chunks
+      copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+      __ jmp(L_copy_4_bytes);
+    }
     return start;
   }
 
@@ -1582,41 +1587,44 @@
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
 
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(byte_count, count);
-    __ shrptr(count, 3);   // count => qword_count
-
-    // Copy from high to low addresses.
-
-    // Check for and copy trailing byte
-    __ testl(byte_count, 1);
-    __ jcc(Assembler::zero, L_copy_2_bytes);
-    __ movb(rax, Address(from, byte_count, Address::times_1, -1));
-    __ movb(Address(to, byte_count, Address::times_1, -1), rax);
-    __ decrement(byte_count); // Adjust for possible trailing word
-
-    // Check for and copy trailing word
-  __ BIND(L_copy_2_bytes);
-    __ testl(byte_count, 2);
-    __ jcc(Assembler::zero, L_copy_4_bytes);
-    __ movw(rax, Address(from, byte_count, Address::times_1, -2));
-    __ movw(Address(to, byte_count, Address::times_1, -2), rax);
-
-    // Check for and copy trailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(byte_count, 4);
-    __ jcc(Assembler::zero, L_copy_bytes);
-    __ movl(rax, Address(from, qword_count, Address::times_8));
-    __ movl(Address(to, qword_count, Address::times_8), rax);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
-    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
-    __ decrement(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(byte_count, count);
+      __ shrptr(count, 3);   // count => qword_count
+
+      // Copy from high to low addresses.
+
+      // Check for and copy trailing byte
+      __ testl(byte_count, 1);
+      __ jcc(Assembler::zero, L_copy_2_bytes);
+      __ movb(rax, Address(from, byte_count, Address::times_1, -1));
+      __ movb(Address(to, byte_count, Address::times_1, -1), rax);
+      __ decrement(byte_count); // Adjust for possible trailing word
+
+      // Check for and copy trailing word
+    __ BIND(L_copy_2_bytes);
+      __ testl(byte_count, 2);
+      __ jcc(Assembler::zero, L_copy_4_bytes);
+      __ movw(rax, Address(from, byte_count, Address::times_1, -2));
+      __ movw(Address(to, byte_count, Address::times_1, -2), rax);
+
+      // Check for and copy trailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(byte_count, 4);
+      __ jcc(Assembler::zero, L_copy_bytes);
+      __ movl(rax, Address(from, qword_count, Address::times_8));
+      __ movl(Address(to, qword_count, Address::times_8), rax);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(from, qword_count, Address::times_8, -8));
+      __ movq(Address(to, qword_count, Address::times_8, -8), rax);
+      __ decrement(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+    }
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1624,9 +1632,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // Copy in multi-bytes chunks
+      copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    }
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1684,44 +1695,48 @@
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
 
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(word_count, count);
-    __ shrptr(count, 2); // count => qword_count
-
-    // Copy from low to high addresses.  Use 'to' as scratch.
-    __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
-    __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
-    __ negptr(qword_count);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
-    __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
-    __ increment(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
-    // Original 'dest' is trashed, so we can't use it as a
-    // base register for a possible trailing word copy
-
-    // Check for and copy trailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(word_count, 2);
-    __ jccb(Assembler::zero, L_copy_2_bytes);
-    __ movl(rax, Address(end_from, 8));
-    __ movl(Address(end_to, 8), rax);
-
-    __ addptr(end_from, 4);
-    __ addptr(end_to, 4);
-
-    // Check for and copy trailing word
-  __ BIND(L_copy_2_bytes);
-    __ testl(word_count, 1);
-    __ jccb(Assembler::zero, L_exit);
-    __ movw(rax, Address(end_from, 8));
-    __ movw(Address(end_to, 8), rax);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(word_count, count);
+      __ shrptr(count, 2); // count => qword_count
+
+      // Copy from low to high addresses.  Use 'to' as scratch.
+      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
+      __ negptr(qword_count);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
+      __ increment(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+
+      // Original 'dest' is trashed, so we can't use it as a
+      // base register for a possible trailing word copy
+
+      // Check for and copy trailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(word_count, 2);
+      __ jccb(Assembler::zero, L_copy_2_bytes);
+      __ movl(rax, Address(end_from, 8));
+      __ movl(Address(end_to, 8), rax);
+
+      __ addptr(end_from, 4);
+      __ addptr(end_to, 4);
+
+      // Check for and copy trailing word
+    __ BIND(L_copy_2_bytes);
+      __ testl(word_count, 1);
+      __ jccb(Assembler::zero, L_exit);
+      __ movw(rax, Address(end_from, 8));
+      __ movw(Address(end_to, 8), rax);
+    }
   __ BIND(L_exit);
+    address ucme_exit_pc = __ pc();
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1729,9 +1744,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-    __ jmp(L_copy_4_bytes);
+    {
+      UnsafeCopyMemoryMark ucmm(this, !aligned, false, ucme_exit_pc);
+      // Copy in multi-bytes chunks
+      copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+      __ jmp(L_copy_4_bytes);
+    }
 
     return start;
   }
@@ -1798,33 +1816,36 @@
     setup_arg_regs(); // from => rdi, to => rsi, count => rdx
                       // r9 and r10 may be used to save non-volatile registers
 
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(word_count, count);
-    __ shrptr(count, 2); // count => qword_count
-
-    // Copy from high to low addresses.  Use 'to' as scratch.
-
-    // Check for and copy trailing word
-    __ testl(word_count, 1);
-    __ jccb(Assembler::zero, L_copy_4_bytes);
-    __ movw(rax, Address(from, word_count, Address::times_2, -2));
-    __ movw(Address(to, word_count, Address::times_2, -2), rax);
-
-    // Check for and copy trailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(word_count, 2);
-    __ jcc(Assembler::zero, L_copy_bytes);
-    __ movl(rax, Address(from, qword_count, Address::times_8));
-    __ movl(Address(to, qword_count, Address::times_8), rax);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
-    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
-    __ decrement(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(word_count, count);
+      __ shrptr(count, 2); // count => qword_count
+
+      // Copy from high to low addresses.  Use 'to' as scratch.
+
+      // Check for and copy trailing word
+      __ testl(word_count, 1);
+      __ jccb(Assembler::zero, L_copy_4_bytes);
+      __ movw(rax, Address(from, word_count, Address::times_2, -2));
+      __ movw(Address(to, word_count, Address::times_2, -2), rax);
+
+     // Check for and copy trailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(word_count, 2);
+      __ jcc(Assembler::zero, L_copy_bytes);
+      __ movl(rax, Address(from, qword_count, Address::times_8));
+      __ movl(Address(to, qword_count, Address::times_8), rax);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(from, qword_count, Address::times_8, -8));
+      __ movq(Address(to, qword_count, Address::times_8, -8), rax);
+      __ decrement(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+    }
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1832,9 +1853,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !aligned, true);
+      // Copy in multi-bytes chunks
+      copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    }
     restore_arg_regs();
     inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter after rscratch1 is free
     __ xorptr(rax, rax); // return 0
@@ -1905,31 +1929,35 @@
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
 
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(dword_count, count);
-    __ shrptr(count, 1); // count => qword_count
-
-    // Copy from low to high addresses.  Use 'to' as scratch.
-    __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
-    __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
-    __ negptr(qword_count);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
-    __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
-    __ increment(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
-    // Check for and copy trailing dword
-  __ BIND(L_copy_4_bytes);
-    __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
-    __ jccb(Assembler::zero, L_exit);
-    __ movl(rax, Address(end_from, 8));
-    __ movl(Address(end_to, 8), rax);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(dword_count, count);
+      __ shrptr(count, 1); // count => qword_count
+
+      // Copy from low to high addresses.  Use 'to' as scratch.
+      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
+      __ negptr(qword_count);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
+      __ increment(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+
+      // Check for and copy trailing dword
+    __ BIND(L_copy_4_bytes);
+      __ testl(dword_count, 1); // Only byte test since the value is 0 or 1
+      __ jccb(Assembler::zero, L_exit);
+      __ movl(rax, Address(end_from, 8));
+      __ movl(Address(end_to, 8), rax);
+    }
   __ BIND(L_exit);
+    address ucme_exit_pc = __ pc();
     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
     restore_arg_regs_using_thread();
     inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
@@ -1938,9 +1966,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-    __ jmp(L_copy_4_bytes);
+    {
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, false, ucme_exit_pc);
+      // Copy in multi-bytes chunks
+      copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+      __ jmp(L_copy_4_bytes);
+    }
 
     return start;
   }
@@ -2001,26 +2032,29 @@
     bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
 
     assert_clean_int(count, rax); // Make sure 'count' is clean int.
-    // 'from', 'to' and 'count' are now valid
-    __ movptr(dword_count, count);
-    __ shrptr(count, 1); // count => qword_count
-
-    // Copy from high to low addresses.  Use 'to' as scratch.
-
-    // Check for and copy trailing dword
-    __ testl(dword_count, 1);
-    __ jcc(Assembler::zero, L_copy_bytes);
-    __ movl(rax, Address(from, dword_count, Address::times_4, -4));
-    __ movl(Address(to, dword_count, Address::times_4, -4), rax);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
-    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
-    __ decrement(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+      // 'from', 'to' and 'count' are now valid
+      __ movptr(dword_count, count);
+      __ shrptr(count, 1); // count => qword_count
+
+      // Copy from high to low addresses.  Use 'to' as scratch.
+
+      // Check for and copy trailing dword
+      __ testl(dword_count, 1);
+      __ jcc(Assembler::zero, L_copy_bytes);
+      __ movl(rax, Address(from, dword_count, Address::times_4, -4));
+      __ movl(Address(to, dword_count, Address::times_4, -4), rax);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(from, qword_count, Address::times_8, -8));
+      __ movq(Address(to, qword_count, Address::times_8, -8), rax);
+      __ decrement(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+    }
     if (is_oop) {
       __ jmp(L_exit);
     }
@@ -2031,8 +2065,12 @@
     __ leave(); // required for proper stackwalking of RuntimeStub frame
     __ ret(0);
 
-    // Copy in multi-bytes chunks
-    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+      // Copy in multi-bytes chunks
+      copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    }
 
   __ BIND(L_exit);
     bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
@@ -2102,20 +2140,23 @@
     BasicType type = is_oop ? T_OBJECT : T_LONG;
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
-
-    // Copy from low to high addresses.  Use 'to' as scratch.
-    __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
-    __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
-    __ negptr(qword_count);
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
-    __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
-    __ increment(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+
+      // Copy from low to high addresses.  Use 'to' as scratch.
+      __ lea(end_from, Address(from, qword_count, Address::times_8, -8));
+      __ lea(end_to,   Address(to,   qword_count, Address::times_8, -8));
+      __ negptr(qword_count);
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(end_from, qword_count, Address::times_8, 8));
+      __ movq(Address(end_to, qword_count, Address::times_8, 8), rax);
+      __ increment(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+    }
     if (is_oop) {
       __ jmp(L_exit);
     } else {
@@ -2127,8 +2168,12 @@
       __ ret(0);
     }
 
-    // Copy in multi-bytes chunks
-    copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+      // Copy in multi-bytes chunks
+      copy_bytes_forward(end_from, end_to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    }
 
     __ BIND(L_exit);
     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
@@ -2195,16 +2240,19 @@
     BasicType type = is_oop ? T_OBJECT : T_LONG;
     BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
     bs->arraycopy_prologue(_masm, decorators, type, from, to, qword_count);
-
-    __ jmp(L_copy_bytes);
-
-    // Copy trailing qwords
-  __ BIND(L_copy_8_bytes);
-    __ movq(rax, Address(from, qword_count, Address::times_8, -8));
-    __ movq(Address(to, qword_count, Address::times_8, -8), rax);
-    __ decrement(qword_count);
-    __ jcc(Assembler::notZero, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+
+      __ jmp(L_copy_bytes);
+
+      // Copy trailing qwords
+    __ BIND(L_copy_8_bytes);
+      __ movq(rax, Address(from, qword_count, Address::times_8, -8));
+      __ movq(Address(to, qword_count, Address::times_8, -8), rax);
+      __ decrement(qword_count);
+      __ jcc(Assembler::notZero, L_copy_8_bytes);
+    }
     if (is_oop) {
       __ jmp(L_exit);
     } else {
@@ -2215,10 +2263,13 @@
       __ leave(); // required for proper stackwalking of RuntimeStub frame
       __ ret(0);
     }
-
-    // Copy in multi-bytes chunks
-    copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
-
+    {
+      // UnsafeCopyMemory page error: continue after ucm
+      UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+
+      // Copy in multi-bytes chunks
+      copy_bytes_backward(from, to, qword_count, rax, L_copy_bytes, L_copy_8_bytes);
+    }
     __ BIND(L_exit);
     bs->arraycopy_epilogue(_masm, decorators, type, from, to, qword_count);
     restore_arg_regs_using_thread();
@@ -6036,6 +6087,10 @@
   }
 }; // end class declaration
 
+#define UCM_TABLE_MAX_ENTRIES 16
 void StubGenerator_generate(CodeBuffer* code, bool all) {
+  if (UnsafeCopyMemory::_table == NULL) {
+    UnsafeCopyMemory::create_table(UCM_TABLE_MAX_ENTRIES);
+  }
   StubGenerator g(code, all);
 }

--- a/src/hotspot/os/windows/os_windows.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os/windows/os_windows.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -2581,10 +2581,18 @@
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
       }
-      if ((thread->thread_state() == _thread_in_vm &&
+
+      bool is_unsafe_arraycopy = (thread->thread_state() == _thread_in_native || in_java) && UnsafeCopyMemory::contains_pc(pc);
+      if (((thread->thread_state() == _thread_in_vm ||
+           thread->thread_state() == _thread_in_native ||
+           is_unsafe_arraycopy) &&
           thread->doing_unsafe_access()) ||
           (nm != NULL && nm->has_unsafe_access())) {
-        return Handle_Exception(exceptionInfo, SharedRuntime::handle_unsafe_access(thread, (address)Assembler::locate_next_instruction(pc)));
+        address next_pc =  Assembler::locate_next_instruction(pc);
+        if (is_unsafe_arraycopy) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
+        return Handle_Exception(exceptionInfo, SharedRuntime::handle_unsafe_access(thread, next_pc));
       }
     }

--- a/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -441,8 +441,12 @@
         // underlying file has been truncated. Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = cb->as_compiled_method_or_null();
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
           address next_pc = pc + 4;
+          if (is_unsafe_arraycopy) {
+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc);
           os::Aix::ucontext_set_pc(uc, next_pc);
           return 1;
@@ -461,9 +465,13 @@
         stub = pc + 4;  // continue with next instruction.
         goto run_stub;
       }
-      else if (thread->thread_state() == _thread_in_vm &&
+      else if ((thread->thread_state() == _thread_in_vm ||
+                thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && thread->doing_unsafe_access()) {
         address next_pc = pc + 4;
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc);
         os::Aix::ucontext_set_pc(uc, next_pc);
         return 1;

--- a/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -589,8 +589,12 @@
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc);
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
           address next_pc = Assembler::locate_next_instruction(pc);
+          if (is_unsafe_arraycopy) {
+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
         }
       }
@@ -659,10 +663,14 @@
           // Determination of interpreter/vtable stub/compiled code null exception
           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
       }
-    } else if (thread->thread_state() == _thread_in_vm &&
+    } else if ((thread->thread_state() == _thread_in_vm ||
+                thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
                thread->doing_unsafe_access()) {
         address next_pc = Assembler::locate_next_instruction(pc);
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
     }

--- a/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -193,7 +193,8 @@
     /*if (thread->thread_state() == _thread_in_Java) {
       ShouldNotCallThis();
     }
-    else*/ if (thread->thread_state() == _thread_in_vm &&
+    else*/ if ((thread->thread_state() == _thread_in_vm ||
+               thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && thread->doing_unsafe_access()) {
       ShouldNotCallThis();
     }

--- a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -419,8 +419,12 @@
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
           address next_pc = pc + NativeCall::instruction_size;
+          if (is_unsafe_arraycopy) {
+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
         }
       }
@@ -439,10 +443,14 @@
           // Determination of interpreter/vtable stub/compiled code null exception
           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
       }
-    } else if (thread->thread_state() == _thread_in_vm &&
+    } else if ((thread->thread_state() == _thread_in_vm ||
+                 thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
                thread->doing_unsafe_access()) {
       address next_pc = pc + NativeCall::instruction_size;
+      if (UnsafeCopyMemory::contains_pc(pc)) {
+        next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+      }
       stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
     }

--- a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -384,7 +384,7 @@
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
+        if ((nm != NULL && nm->has_unsafe_access()) || (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc))) {
           unsafe_access = true;
         }
       } else if (sig == SIGSEGV &&
@@ -398,7 +398,8 @@
         // Zombie
         stub = SharedRuntime::get_handle_wrong_method_stub();
       }
-    } else if (thread->thread_state() == _thread_in_vm &&
+    } else if ((thread->thread_state() == _thread_in_vm ||
+                thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && thread->doing_unsafe_access()) {
         unsafe_access = true;
     }
@@ -418,6 +419,9 @@
     // any other suitable exception reason,
     // so assume it is an unsafe access.
     address next_pc = pc + Assembler::InstructionSize;
+    if (UnsafeCopyMemory::contains_pc(pc)) {
+      next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+    }
 #ifdef __thumb__
     if (uc->uc_mcontext.arm_cpsr & PSR_T_BIT) {
       next_pc = (address)((intptr_t)next_pc | 0x1);

--- a/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -469,8 +469,12 @@
         // underlying file has been truncated. Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
           address next_pc = pc + 4;
+          if (is_unsafe_arraycopy) {
+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc);
           os::Linux::ucontext_set_pc(uc, next_pc);
           return true;
@@ -485,11 +489,15 @@
                         // flushing of icache is not necessary.
         stub = pc + 4;  // continue with next instruction.
       }
-      else if (thread->thread_state() == _thread_in_vm &&
+      else if ((thread->thread_state() == _thread_in_vm ||
+                thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && thread->doing_unsafe_access()) {
         address next_pc = pc + 4;
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc);
-        os::Linux::ucontext_set_pc(uc, pc + 4);
+        os::Linux::ucontext_set_pc(uc, next_pc);
         return true;
       }
     }

--- a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2016, 2018 SAP SE. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -467,7 +467,8 @@
         // when the vector facility is installed, but operating system support is missing.
         VM_Version::reset_has_VectorFacility();
         stub = pc; // Continue with next instruction.
-      } else if (thread->thread_state() == _thread_in_vm &&
+      } else if ((thread->thread_state() == _thread_in_vm ||
+                  thread->thread_state() == _thread_in_native) &&
                  sig == SIGBUS && thread->doing_unsafe_access()) {
         // We don't really need a stub here! Just set the pending exeption and
         // continue at the next instruction after the faulting read. Returning

--- a/src/hotspot/os_cpu/linux_sparc/os_linux_sparc.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/linux_sparc/os_linux_sparc.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -385,7 +385,11 @@
   // Do not crash the VM in such a case.
   CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
   CompiledMethod* nm = cb->as_compiled_method_or_null();
-  if (nm != NULL && nm->has_unsafe_access()) {
+  bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+  if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
+    if (is_unsafe_arraycopy) {
+      npc = UnsafeCopyMemory::page_error_continue_pc(pc);
+    }
     *stub = SharedRuntime::handle_unsafe_access(thread, npc);
     return true;
   }
@@ -550,8 +554,12 @@
     }
 
     if (sig == SIGBUS &&
-        thread->thread_state() == _thread_in_vm &&
+        (thread->thread_state() == _thread_in_vm ||
+         thread->thread_state() == _thread_in_native) &&
         thread->doing_unsafe_access()) {
+      if (UnsafeCopyMemory::contains_pc(pc)) {
+        npc = UnsafeCopyMemory::page_error_continue_pc(pc);
+      }
       stub = SharedRuntime::handle_unsafe_access(thread, npc);
     }

--- a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -435,8 +435,12 @@
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc);
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
           address next_pc = Assembler::locate_next_instruction(pc);
+          if (is_unsafe_arraycopy) {
+            next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
         }
       }
@@ -483,10 +487,14 @@
           // Determination of interpreter/vtable stub/compiled code null exception
           stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
       }
-    } else if (thread->thread_state() == _thread_in_vm &&
-               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
-               thread->doing_unsafe_access()) {
+    } else if ((thread->thread_state() == _thread_in_vm ||
+                thread->thread_state() == _thread_in_native) &&
+               (sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
+               thread->doing_unsafe_access())) {
         address next_pc = Assembler::locate_next_instruction(pc);
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
     }

--- a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -207,7 +207,8 @@
     /*if (thread->thread_state() == _thread_in_Java) {
       ShouldNotCallThis();
     }
-    else*/ if (thread->thread_state() == _thread_in_vm &&
+    else*/ if ((thread->thread_state() == _thread_in_vm ||
+               thread->thread_state() == _thread_in_native) &&
                sig == SIGBUS && thread->doing_unsafe_access()) {
       ShouldNotCallThis();
     }

--- a/src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -436,8 +436,12 @@
     }
 
 
-    if (thread->thread_state() == _thread_in_vm) {
+    if (thread->thread_state() == _thread_in_vm ||
+        thread->thread_state() == _thread_in_native) {
       if (sig == SIGBUS && thread->doing_unsafe_access()) {
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          npc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         stub = SharedRuntime::handle_unsafe_access(thread, npc);
       }
     }
@@ -476,7 +480,11 @@
         // Do not crash the VM in such a case.
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         CompiledMethod* nm = cb->as_compiled_method_or_null();
-        if (nm != NULL && nm->has_unsafe_access()) {
+        bool is_unsafe_arraycopy = (thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc));
+        if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy) {
+          if (is_unsafe_arraycopy) {
+            npc = UnsafeCopyMemory::page_error_continue_pc(pc);
+          }
           stub = SharedRuntime::handle_unsafe_access(thread, npc);
         }
       }

--- a/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -517,9 +517,13 @@
       stub = VM_Version::cpuinfo_cont_addr();
     }
 
-    if (thread->thread_state() == _thread_in_vm) {
+    if (thread->thread_state() == _thread_in_vm ||
+         thread->thread_state() == _thread_in_native) {
       if (sig == SIGBUS && info->si_code == BUS_OBJERR && thread->doing_unsafe_access()) {
         address next_pc = Assembler::locate_next_instruction(pc);
+        if (UnsafeCopyMemory::contains_pc(pc)) {
+          next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+        }
         stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
       }
     }
@@ -536,8 +540,12 @@
         CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
         if (cb != NULL) {
           CompiledMethod* nm = cb->as_compiled_method_or_null();
-          if (nm != NULL && nm->has_unsafe_access()) {
+          bool is_unsafe_arraycopy = thread->doing_unsafe_access() && UnsafeCopyMemory::contains_pc(pc);
+          if ((nm != NULL && nm->has_unsafe_access()) || is_unsafe_arraycopy)) {
             address next_pc = Assembler::locate_next_instruction(pc);
+            if (is_unsafe_arraycopy) {
+              next_pc = UnsafeCopyMemory::page_error_continue_pc(pc);
+            }
             stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
           }
         }

--- a/src/hotspot/share/opto/library_call.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/share/opto/library_call.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -4240,6 +4240,14 @@
   // Do not let writes of the copy source or destination float below the copy.
   insert_mem_bar(Op_MemBarCPUOrder);
 
+  Node* thread = _gvn.transform(new ThreadLocalNode());
+  Node* doing_unsafe_access_addr = basic_plus_adr(top(), thread, in_bytes(JavaThread::doing_unsafe_access_offset()));
+  BasicType doing_unsafe_access_bt = T_BYTE;
+  assert((sizeof(bool) * CHAR_BIT) == 8, "not implemented");
+
+  // update volatile field
+  store_to_memory(control(), doing_unsafe_access_addr, intcon(1), doing_unsafe_access_bt, Compile::AliasIdxRaw, MemNode::unordered);
+
   // Call it.  Note that the length argument is not scaled.
   make_runtime_call(RC_LEAF|RC_NO_FP,
                     OptoRuntime::fast_arraycopy_Type(),
@@ -4248,6 +4256,8 @@
                     TypeRawPtr::BOTTOM,
                     src, dst, size XTOP);
 
+  store_to_memory(control(), doing_unsafe_access_addr, intcon(0), doing_unsafe_access_bt, Compile::AliasIdxRaw, MemNode::unordered);
+
   // Do not let reads of the copy destination float above the copy.
   insert_mem_bar(Op_MemBarCPUOrder);

--- a/src/hotspot/share/prims/unsafe.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/share/prims/unsafe.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -149,6 +149,25 @@
 ///// Data read/writes on the Java heap and in native (off-heap) memory
 
 /**
+ * Helper class to wrap memory accesses in JavaThread::doing_unsafe_access()
+ */
+class GuardUnsafeAccess {
+  JavaThread* _thread;
+
+public:
+  GuardUnsafeAccess(JavaThread* thread) : _thread(thread) {
+    // native/off-heap access which may raise SIGBUS if accessing
+    // memory mapped file data in a region of the file which has
+    // been truncated and is now invalid.
+    _thread->set_doing_unsafe_access(true);
+  }
+
+  ~GuardUnsafeAccess() {
+    _thread->set_doing_unsafe_access(false);
+  }
+};
+
+/**
  * Helper class for accessing memory.
  *
  * Normalizes values and wraps accesses in
@@ -189,25 +208,6 @@
     return x != 0;
   }
 
-  /**
-   * Helper class to wrap memory accesses in JavaThread::doing_unsafe_access()
-   */
-  class GuardUnsafeAccess {
-    JavaThread* _thread;
-
-  public:
-    GuardUnsafeAccess(JavaThread* thread) : _thread(thread) {
-      // native/off-heap access which may raise SIGBUS if accessing
-      // memory mapped file data in a region of the file which has
-      // been truncated and is now invalid
-      _thread->set_doing_unsafe_access(true);
-    }
-
-    ~GuardUnsafeAccess() {
-      _thread->set_doing_unsafe_access(false);
-    }
-  };
-
 public:
   MemoryAccess(JavaThread* thread, jobject obj, jlong offset)
     : _thread(thread), _obj(JNIHandles::resolve(obj)), _offset((ptrdiff_t)offset) {
@@ -399,8 +399,14 @@
 
   void* src = index_oop_from_field_offset_long(srcp, srcOffset);
   void* dst = index_oop_from_field_offset_long(dstp, dstOffset);
-
-  Copy::conjoint_memory_atomic(src, dst, sz);
+  {
+    GuardUnsafeAccess guard(thread);
+    if (StubRoutines::unsafe_arraycopy() != NULL) {
+      StubRoutines::UnsafeArrayCopy_stub()(src, dst, sz);
+    } else {
+      Copy::conjoint_memory_atomic(src, dst, sz);
+    }
+  }
 } UNSAFE_END
 
 // This function is a leaf since if the source and destination are both in native memory
@@ -416,7 +422,11 @@
     address src = (address)srcOffset;
     address dst = (address)dstOffset;
 
-    Copy::conjoint_swap(src, dst, sz, esz);
+    {
+      JavaThread* thread = JavaThread::thread_from_jni_environment(env);
+      GuardUnsafeAccess guard(thread);
+      Copy::conjoint_swap(src, dst, sz, esz);
+    }
   } else {
     // At least one of src/dst are on heap, transition to VM to access raw pointers
 
@@ -427,7 +437,10 @@
       address src = (address)index_oop_from_field_offset_long(srcp, srcOffset);
       address dst = (address)index_oop_from_field_offset_long(dstp, dstOffset);
 
-      Copy::conjoint_swap(src, dst, sz, esz);
+      {
+        GuardUnsafeAccess guard(thread);
+        Copy::conjoint_swap(src, dst, sz, esz);
+      }
     } JVM_END
   }
 } UNSAFE_END

--- a/src/hotspot/share/runtime/stubRoutines.cpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/share/runtime/stubRoutines.cpp	Mon Jun 24 11:37:56 2019 -0700
@@ -38,6 +38,10 @@
 #include "opto/runtime.hpp"
 #endif
 
+UnsafeCopyMemory* UnsafeCopyMemory::_table                      = NULL;
+int UnsafeCopyMemory::_table_length                             = 0;
+int UnsafeCopyMemory::_table_max_length                         = 0;
+address UnsafeCopyMemory::_common_exit_stub_pc                  = NULL;
 
 // Implementation of StubRoutines - for a description
 // of how to extend it, see the header file.
@@ -113,7 +117,6 @@
 address StubRoutines::_unsafe_arraycopy                  = NULL;
 address StubRoutines::_generic_arraycopy                 = NULL;
 
-
 address StubRoutines::_jbyte_fill;
 address StubRoutines::_jshort_fill;
 address StubRoutines::_jint_fill;
@@ -177,6 +180,31 @@
 
 extern void StubGenerator_generate(CodeBuffer* code, bool all); // only interface to generators
 
+void UnsafeCopyMemory::create_table(int max_size) {
+  UnsafeCopyMemory::_table = new UnsafeCopyMemory[max_size];
+  UnsafeCopyMemory::_table_max_length = max_size;
+}
+
+bool UnsafeCopyMemory::contains_pc(address pc) {
+  for (int i = 0; i < UnsafeCopyMemory::_table_length; i++) {
+    UnsafeCopyMemory* entry = &UnsafeCopyMemory::_table[i];
+    if (pc >= entry->start_pc() && pc < entry->end_pc()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+address UnsafeCopyMemory::page_error_continue_pc(address pc) {
+  for (int i = 0; i < UnsafeCopyMemory::_table_length; i++) {
+    UnsafeCopyMemory* entry = &UnsafeCopyMemory::_table[i];
+    if (pc >= entry->start_pc() && pc < entry->end_pc()) {
+      return entry->error_exit_pc();
+    }
+  }
+  return NULL;
+}
+
 void StubRoutines::initialize1() {
   if (_code1 == NULL) {
     ResourceMark rm;
@@ -569,3 +597,25 @@
 #undef RETURN_STUB
 #undef RETURN_STUB_PARM
 }
+
+UnsafeCopyMemoryMark::UnsafeCopyMemoryMark(StubCodeGenerator* cgen, bool add_entry, bool continue_at_scope_end, address error_exit_pc) {
+  _cgen = cgen;
+  _ucm_entry = NULL;
+  if (add_entry) {
+    address err_exit_pc = NULL;
+    if (!continue_at_scope_end) {
+      err_exit_pc = error_exit_pc != NULL ? error_exit_pc : UnsafeCopyMemory::common_exit_stub_pc();
+    }
+    assert(err_exit_pc != NULL || continue_at_scope_end, "error exit not set");
+    _ucm_entry = UnsafeCopyMemory::add_to_table(_cgen->assembler()->pc(), NULL, err_exit_pc);
+  }
+}
+
+UnsafeCopyMemoryMark::~UnsafeCopyMemoryMark() {
+  if (_ucm_entry != NULL) {
+    _ucm_entry->set_end_pc(_cgen->assembler()->pc());
+    if (_ucm_entry->error_exit_pc() == NULL) {
+      _ucm_entry->set_error_exit_pc(_cgen->assembler()->pc());
+    }
+  }
+}

--- a/src/hotspot/share/runtime/stubRoutines.hpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/share/runtime/stubRoutines.hpp	Mon Jun 24 11:37:56 2019 -0700
@@ -74,6 +74,51 @@
 // 4. implement the corresponding generator function in the platform-dependent
 //    stubGenerator_<arch>.cpp file and call the function in generate_all() of that file
 
+class UnsafeCopyMemory : public CHeapObj<mtCode> {
+ private:
+  address _start_pc;
+  address _end_pc;
+  address _error_exit_pc;
+ public:
+  static address           _common_exit_stub_pc;
+  static UnsafeCopyMemory* _table;
+  static int               _table_length;
+  static int               _table_max_length;
+  UnsafeCopyMemory() : _start_pc(NULL), _end_pc(NULL), _error_exit_pc(NULL) {}
+  void    set_start_pc(address pc)      { _start_pc = pc; }
+  void    set_end_pc(address pc)        { _end_pc = pc; }
+  void    set_error_exit_pc(address pc) { _error_exit_pc = pc; }
+  address start_pc()      const { return _start_pc; }
+  address end_pc()        const { return _end_pc; }
+  address error_exit_pc() const { return _error_exit_pc; }
+
+  static void    set_common_exit_stub_pc(address pc) { _common_exit_stub_pc = pc; }
+  static address common_exit_stub_pc()               { return _common_exit_stub_pc; }
+
+  static UnsafeCopyMemory* add_to_table(address start_pc, address end_pc, address error_exit_pc) {
+    guarantee(_table_length < _table_max_length, "Incorrect UnsafeCopyMemory::_table_max_length");
+    UnsafeCopyMemory* entry = &_table[_table_length];
+    entry->set_start_pc(start_pc);
+    entry->set_end_pc(end_pc);
+    entry->set_error_exit_pc(error_exit_pc);
+
+    _table_length++;
+    return entry;
+  }
+
+  static bool    contains_pc(address pc);
+  static address page_error_continue_pc(address pc);
+  static void    create_table(int max_size);
+};
+
+class UnsafeCopyMemoryMark : public StackObj {
+ private:
+  UnsafeCopyMemory*  _ucm_entry;
+  StubCodeGenerator* _cgen;
+ public:
+  UnsafeCopyMemoryMark(StubCodeGenerator* cgen, bool add_entry, bool continue_at_scope_end, address error_exit_pc = NULL);
+  ~UnsafeCopyMemoryMark();
+};
 
 class StubRoutines: AllStatic {
 
@@ -310,11 +355,14 @@
   static address arrayof_oop_disjoint_arraycopy(bool dest_uninitialized = false) {
     return dest_uninitialized ? _arrayof_oop_disjoint_arraycopy_uninit : _arrayof_oop_disjoint_arraycopy;
   }
-
   static address checkcast_arraycopy(bool dest_uninitialized = false) {
     return dest_uninitialized ? _checkcast_arraycopy_uninit : _checkcast_arraycopy;
   }
-  static address unsafe_arraycopy()    { return _unsafe_arraycopy; }
+  static address unsafe_arraycopy()     { return _unsafe_arraycopy; }
+
+  typedef void (*UnsafeArrayCopyStub)(const void* src, void* dst, size_t count);
+  static UnsafeArrayCopyStub UnsafeArrayCopy_stub()         { return CAST_TO_FN_PTR(UnsafeArrayCopyStub,  _unsafe_arraycopy); }
+
   static address generic_arraycopy()   { return _generic_arraycopy; }
 
   static address jbyte_fill()          { return _jbyte_fill; }

--- a/src/hotspot/share/runtime/thread.hpp	Thu Jun 20 14:09:22 2019 +0100
+++ b/src/hotspot/share/runtime/thread.hpp	Mon Jun 24 11:37:56 2019 -0700
@@ -1794,6 +1794,7 @@
   static ByteSize should_post_on_exceptions_flag_offset() {
     return byte_offset_of(JavaThread, _should_post_on_exceptions_flag);
   }
+  static ByteSize doing_unsafe_access_offset() { return byte_offset_of(JavaThread, _doing_unsafe_access); }
 
   // Returns the jni environment for this thread
   JNIEnv* jni_environment()                      { return &_jni_environment; }

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/runtime/Unsafe/InternalErrorTest.java	Mon Jun 24 11:37:56 2019 -0700
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8191278
+ * @requires os.family != "windows"
+ * @summary Check that SIGBUS errors caused by memory accesses in Unsafe_CopyMemory()
+ * and UnsafeCopySwapMemory() get converted to java.lang.InternalError exceptions.
+ * @modules java.base/jdk.internal.misc
+ *
+ * @library /test/lib
+ * @build sun.hotspot.WhiteBox
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *      sun.hotspot.WhiteBox$WhiteBoxPermission
+ *
+ * @run main/othervm -XX:CompileCommand=exclude,*InternalErrorTest.main -XX:CompileCommand=inline,*.get -XX:CompileCommand=inline,*Unsafe.* -Xbootclasspath/a:.  -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI InternalErrorTest
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import jdk.internal.misc.Unsafe;
+import sun.hotspot.WhiteBox;
+
+// Test that illegal memory access errors in Unsafe_CopyMemory0() and
+// UnsafeCopySwapMemory() that cause SIGBUS errors result in
+// java.lang.InternalError exceptions, not JVM crashes.
+public class InternalErrorTest {
+
+    private static final Unsafe unsafe = Unsafe.getUnsafe();
+    private static final int pageSize = WhiteBox.getWhiteBox().getVMPageSize();
+    private static final String expectedErrorMsg = "fault occurred in a recent unsafe memory access";
+    private static final String failureMsg1 = "InternalError not thrown";
+    private static final String failureMsg2 = "Wrong InternalError: ";
+
+    public static void main(String[] args) throws Throwable {
+        Unsafe unsafe = Unsafe.getUnsafe();
+
+        String currentDir = System.getProperty("test.classes");
+        File file = new File(currentDir, "tmpFile.txt");
+
+        StringBuilder s = new StringBuilder();
+        for (int i = 1; i < pageSize + 1000; i++) {
+            s.append("1");
+        }
+        Files.write(file.toPath(), s.toString().getBytes());
+        FileChannel fileChannel = new RandomAccessFile(file, "r").getChannel();
+        MappedByteBuffer buffer =
+            fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size());
+
+        // Get address of mapped memory.
+        long mapAddr = 0;
+        try {
+            Field af = java.nio.Buffer.class.getDeclaredField("address");
+            af.setAccessible(true);
+            mapAddr = af.getLong(buffer);
+        } catch (Exception f) {
+            throw f;
+        }
+        long allocMem = unsafe.allocateMemory(4000);
+
+        for (int i = 0; i < 3; i++) {
+            test(buffer, unsafe, mapAddr, allocMem, i);
+        }
+
+        Files.write(file.toPath(), "2".getBytes());
+        buffer.position(buffer.position() + pageSize);
+        for (int i = 0; i < 3; i++) {
+            try {
+                test(buffer, unsafe, mapAddr, allocMem, i);
+                WhiteBox.getWhiteBox().forceSafepoint();
+                throw new RuntimeException(failureMsg1);
+            } catch (InternalError e) {
+                if (!e.getMessage().contains(expectedErrorMsg)) {
+                    throw new RuntimeException(failureMsg2 + e.getMessage());
+                }
+            }
+        }
+
+        Method m = InternalErrorTest.class.getMethod("test", MappedByteBuffer.class, Unsafe.class, long.class, long.class, int.class);
+        WhiteBox.getWhiteBox().enqueueMethodForCompilation(m, 3);
+
+        for (int i = 0; i < 3; i++) {
+            try {
+                test(buffer, unsafe, mapAddr, allocMem, i);
+                WhiteBox.getWhiteBox().forceSafepoint();
+                throw new RuntimeException(failureMsg1);
+            } catch (InternalError e) {
+                if (!e.getMessage().contains(expectedErrorMsg)) {
+                    throw new RuntimeException(failureMsg2 + e.getMessage());
+                }
+            }
+        }
+
+        WhiteBox.getWhiteBox().enqueueMethodForCompilation(m, 4);
+
+        for (int i = 0; i < 3; i++) {
+            try {
+                test(buffer, unsafe, mapAddr, allocMem, i);
+                WhiteBox.getWhiteBox().forceSafepoint();
+                throw new RuntimeException(failureMsg1);
+            } catch (InternalError e) {
+                if (!e.getMessage().contains(expectedErrorMsg)) {
+                    throw new RuntimeException(failureMsg2 + e.getMessage());
+                }
+            }
+        }
+
+        System.out.println("Success");
+    }
+
+    public static void test(MappedByteBuffer buffer, Unsafe unsafe, long mapAddr, long allocMem, int type) {
+        switch (type) {
+            case 0:
+                // testing Unsafe.copyMemory, trying to access a word from next page after truncation.
+                buffer.get(new byte[8]);
+                break;
+            case 1:
+                // testing Unsafe.copySwapMemory, trying to access next  page after truncation.
+                unsafe.copySwapMemory(null, mapAddr + pageSize, new byte[4000], 16, 2000, 2);
+                break;
+            case 2:
+                // testing Unsafe.copySwapMemory, trying to access next  page after truncation.
+                unsafe.copySwapMemory(null, mapAddr + pageSize, null, allocMem, 2000, 2);
+                break;
+        }
+    }
+
+}

author	jcm
	Mon, 24 Jun 2019 11:37:56 -0700
changeset 55490	3f3dc00a69a5
parent 55489	c749ecf599c0
child 55491	41fd388aaa4d

src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/cpu/arm/stubGenerator_arm.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/cpu/ppc/stubGenerator_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/cpu/sparc/stubGenerator_sparc.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/cpu/x86/assembler_x86.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/cpu/x86/stubGenerator_x86_32.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/cpu/x86/stubGenerator_x86_64.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os/windows/os_windows.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/linux_sparc/os_linux_sparc.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/solaris_sparc/os_solaris_sparc.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/os_cpu/solaris_x86/os_solaris_x86.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/share/opto/library_call.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/share/prims/unsafe.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/share/runtime/stubRoutines.cpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/share/runtime/stubRoutines.hpp		file \| annotate \| diff \| comparison \| revisions
src/hotspot/share/runtime/thread.hpp		file \| annotate \| diff \| comparison \| revisions
test/hotspot/jtreg/runtime/Unsafe/InternalErrorTest.java		file \| annotate \| diff \| comparison \| revisions