8186042: Optimize OopMapCache lookup
authorcoleenp
Mon, 28 Aug 2017 09:06:30 -0400
changeset 46996 9cbcd7082efe
parent 46992 95000145dd81
child 46997 20a78d88f981
8186042: Optimize OopMapCache lookup Summary: Use lock free access to oopMapCache Reviewed-by: dholmes, sspitsyn Contributed-by: frederic.parain@oracle.com, coleen.phillimore@oracle.com
hotspot/src/share/vm/gc/shared/vmGCOperations.cpp
hotspot/src/share/vm/interpreter/oopMapCache.cpp
hotspot/src/share/vm/interpreter/oopMapCache.hpp
hotspot/src/share/vm/logging/logTag.hpp
hotspot/src/share/vm/oops/method.cpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/memprofiler.cpp
hotspot/src/share/vm/runtime/vframe.cpp
--- a/hotspot/src/share/vm/gc/shared/vmGCOperations.cpp	Mon Aug 28 02:55:31 2017 -0700
+++ b/hotspot/src/share/vm/gc/shared/vmGCOperations.cpp	Mon Aug 28 09:06:30 2017 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,7 @@
 #include "gc/shared/gcLocker.inline.hpp"
 #include "gc/shared/genCollectedHeap.hpp"
 #include "gc/shared/vmGCOperations.hpp"
+#include "interpreter/oopMapCache.hpp"
 #include "logging/log.hpp"
 #include "memory/oopFactory.hpp"
 #include "runtime/handles.inline.hpp"
@@ -111,6 +112,9 @@
 
 void VM_GC_Operation::doit_epilogue() {
   assert(Thread::current()->is_Java_thread(), "just checking");
+  // Clean up old interpreter OopMap entries that were replaced
+  // during the GC thread root traversal.
+  OopMapCache::cleanup_old_entries();
   if (Universe::has_reference_pending_list()) {
     Heap_lock->notify_all();
   }
--- a/hotspot/src/share/vm/interpreter/oopMapCache.cpp	Mon Aug 28 02:55:31 2017 -0700
+++ b/hotspot/src/share/vm/interpreter/oopMapCache.cpp	Mon Aug 28 09:06:30 2017 -0400
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "interpreter/oopMapCache.hpp"
 #include "logging/log.hpp"
+#include "logging/logStream.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/oop.inline.hpp"
@@ -37,6 +38,9 @@
   friend class OopMapCache;
   friend class VerifyClosure;
 
+ private:
+  OopMapCacheEntry* _next;
+
  protected:
   // Initialization
   void fill(const methodHandle& method, int bci);
@@ -54,8 +58,9 @@
 
  public:
   OopMapCacheEntry() : InterpreterOopMap() {
+    _next = NULL;
 #ifdef ASSERT
-     _resource_allocate_bit_mask = false;
+    _resource_allocate_bit_mask = false;
 #endif
   }
 };
@@ -263,23 +268,26 @@
 
   // Check if map is generated correctly
   // (Use ?: operator to make sure all 'true' & 'false' are represented exactly the same so we can use == afterwards)
-  if (TraceOopMapGeneration && Verbose) tty->print("Locals (%d): ", max_locals);
+  Log(interpreter, oopmap) logv;
+  LogStream st(logv.trace());
 
+  st.print("Locals (%d): ", max_locals);
   for(int i = 0; i < max_locals; i++) {
     bool v1 = is_oop(i)               ? true : false;
     bool v2 = vars[i].is_reference()  ? true : false;
     assert(v1 == v2, "locals oop mask generation error");
-    if (TraceOopMapGeneration && Verbose) tty->print("%d", v1 ? 1 : 0);
+    st.print("%d", v1 ? 1 : 0);
   }
+  st.cr();
 
-  if (TraceOopMapGeneration && Verbose) { tty->cr(); tty->print("Stack (%d): ", stack_top); }
+  st.print("Stack (%d): ", stack_top);
   for(int j = 0; j < stack_top; j++) {
     bool v1 = is_oop(max_locals + j)  ? true : false;
     bool v2 = stack[j].is_reference() ? true : false;
     assert(v1 == v2, "stack oop mask generation error");
-    if (TraceOopMapGeneration && Verbose) tty->print("%d", v1 ? 1 : 0);
+    st.print("%d", v1 ? 1 : 0);
   }
-  if (TraceOopMapGeneration && Verbose) tty->cr();
+  st.cr();
   return true;
 }
 
@@ -373,8 +381,6 @@
 
   // verify bit mask
   assert(verify_mask(vars, stack, max_locals, stack_top), "mask could not be verified");
-
-
 }
 
 void OopMapCacheEntry::flush() {
@@ -385,16 +391,6 @@
 
 // Implementation of OopMapCache
 
-#ifndef PRODUCT
-
-static long _total_memory_usage = 0;
-
-long OopMapCache::memory_usage() {
-  return _total_memory_usage;
-}
-
-#endif
-
 void InterpreterOopMap::resource_copy(OopMapCacheEntry* from) {
   assert(_resource_allocate_bit_mask,
     "Should not resource allocate the _bit_mask");
@@ -435,15 +431,11 @@
          ^ ((unsigned int) method->size_of_parameters() << 6);
 }
 
+OopMapCacheEntry* volatile OopMapCache::_old_entries = NULL;
 
-OopMapCache::OopMapCache() :
-  _mut(Mutex::leaf, "An OopMapCache lock", true)
-{
-  _array  = NEW_C_HEAP_ARRAY(OopMapCacheEntry, _size, mtClass);
-  // Cannot call flush for initialization, since flush
-  // will check if memory should be deallocated
-  for(int i = 0; i < _size; i++) _array[i].initialize();
-  NOT_PRODUCT(_total_memory_usage += sizeof(OopMapCache) + (sizeof(OopMapCacheEntry) * _size);)
+OopMapCache::OopMapCache() {
+  _array  = NEW_C_HEAP_ARRAY(OopMapCacheEntry*, _size, mtClass);
+  for(int i = 0; i < _size; i++) _array[i] = NULL;
 }
 
 
@@ -452,112 +444,152 @@
   // Deallocate oop maps that are allocated out-of-line
   flush();
   // Deallocate array
-  NOT_PRODUCT(_total_memory_usage -= sizeof(OopMapCache) + (sizeof(OopMapCacheEntry) * _size);)
-  FREE_C_HEAP_ARRAY(OopMapCacheEntry, _array);
+  FREE_C_HEAP_ARRAY(OopMapCacheEntry*, _array);
 }
 
 OopMapCacheEntry* OopMapCache::entry_at(int i) const {
-  return &_array[i % _size];
+  return (OopMapCacheEntry*)OrderAccess::load_ptr_acquire(&(_array[i % _size]));
+}
+
+bool OopMapCache::put_at(int i, OopMapCacheEntry* entry, OopMapCacheEntry* old) {
+  return Atomic::cmpxchg_ptr (entry, &_array[i % _size], old) == old;
 }
 
 void OopMapCache::flush() {
-  for (int i = 0; i < _size; i++) _array[i].flush();
+  for (int i = 0; i < _size; i++) {
+    OopMapCacheEntry* entry = _array[i];
+    if (entry != NULL) {
+      _array[i] = NULL;  // no barrier, only called in OopMapCache destructor
+      entry->flush();
+      FREE_C_HEAP_OBJ(entry);
+    }
+  }
 }
 
 void OopMapCache::flush_obsolete_entries() {
-  for (int i = 0; i < _size; i++)
-    if (!_array[i].is_empty() && _array[i].method()->is_old()) {
+  assert(SafepointSynchronize::is_at_safepoint(), "called by RedefineClasses in a safepoint");
+  for (int i = 0; i < _size; i++) {
+    OopMapCacheEntry* entry = _array[i];
+    if (entry != NULL && !entry->is_empty() && entry->method()->is_old()) {
       // Cache entry is occupied by an old redefined method and we don't want
       // to pin it down so flush the entry.
       if (log_is_enabled(Debug, redefine, class, oopmap)) {
         ResourceMark rm;
-        log_debug(redefine, class, oopmap)
+        log_debug(redefine, class, interpreter, oopmap)
           ("flush: %s(%s): cached entry @%d",
-           _array[i].method()->name()->as_C_string(), _array[i].method()->signature()->as_C_string(), i);
+           entry->method()->name()->as_C_string(), entry->method()->signature()->as_C_string(), i);
       }
-      _array[i].flush();
+      _array[i] = NULL;
+      entry->flush();
+      FREE_C_HEAP_OBJ(entry);
     }
+  }
 }
 
+// Called by GC for thread root scan during a safepoint only.  The other interpreted frame oopmaps
+// are generated locally and not cached.
 void OopMapCache::lookup(const methodHandle& method,
                          int bci,
-                         InterpreterOopMap* entry_for) const {
-  MutexLocker x(&_mut);
+                         InterpreterOopMap* entry_for) {
+  assert(SafepointSynchronize::is_at_safepoint(), "called by GC in a safepoint");
+  int probe = hash_value_for(method, bci);
+  int i;
+  OopMapCacheEntry* entry = NULL;
 
-  OopMapCacheEntry* entry = NULL;
-  int probe = hash_value_for(method, bci);
+  if (log_is_enabled(Debug, interpreter, oopmap)) {
+    static int count = 0;
+    ResourceMark rm;
+    log_debug(interpreter, oopmap)
+          ("%d - Computing oopmap at bci %d for %s at hash %d", ++count, bci,
+           method()->name_and_sig_as_C_string(), probe);
+  }
 
   // Search hashtable for match
-  int i;
   for(i = 0; i < _probe_depth; i++) {
     entry = entry_at(probe + i);
-    if (entry->match(method, bci)) {
+    if (entry != NULL && !entry->is_empty() && entry->match(method, bci)) {
       entry_for->resource_copy(entry);
       assert(!entry_for->is_empty(), "A non-empty oop map should be returned");
+      log_debug(interpreter, oopmap)("- found at hash %d", probe + i);
       return;
     }
   }
 
-  if (TraceOopMapGeneration) {
-    static int count = 0;
-    ResourceMark rm;
-    tty->print("%d - Computing oopmap at bci %d for ", ++count, bci);
-    method->print_value(); tty->cr();
-  }
+  // Entry is not in hashtable.
+  // Compute entry
 
-  // Entry is not in hashtable.
-  // Compute entry and return it
+  OopMapCacheEntry* tmp = NEW_C_HEAP_OBJ(OopMapCacheEntry, mtClass);
+  tmp->initialize();
+  tmp->fill(method, bci);
+  entry_for->resource_copy(tmp);
 
   if (method->should_not_be_cached()) {
     // It is either not safe or not a good idea to cache this Method*
     // at this time. We give the caller of lookup() a copy of the
     // interesting info via parameter entry_for, but we don't add it to
     // the cache. See the gory details in Method*.cpp.
-    compute_one_oop_map(method, bci, entry_for);
+    FREE_C_HEAP_OBJ(tmp);
     return;
   }
 
   // First search for an empty slot
   for(i = 0; i < _probe_depth; i++) {
-    entry  = entry_at(probe + i);
-    if (entry->is_empty()) {
-      entry->fill(method, bci);
-      entry_for->resource_copy(entry);
-      assert(!entry_for->is_empty(), "A non-empty oop map should be returned");
-      return;
+    entry = entry_at(probe + i);
+    if (entry == NULL) {
+      if (put_at(probe + i, tmp, NULL)) {
+        assert(!entry_for->is_empty(), "A non-empty oop map should be returned");
+        return;
+      }
     }
   }
 
-  if (TraceOopMapGeneration) {
-    ResourceMark rm;
-    tty->print_cr("*** collision in oopmap cache - flushing item ***");
-  }
+  log_debug(interpreter, oopmap)("*** collision in oopmap cache - flushing item ***");
 
   // No empty slot (uncommon case). Use (some approximation of a) LRU algorithm
-  //entry_at(probe + _probe_depth - 1)->flush();
-  //for(i = _probe_depth - 1; i > 0; i--) {
-  //  // Coping entry[i] = entry[i-1];
-  //  OopMapCacheEntry *to   = entry_at(probe + i);
-  //  OopMapCacheEntry *from = entry_at(probe + i - 1);
-  //  to->copy(from);
-  // }
+  // where the first entry in the collision array is replaced with the new one.
+  OopMapCacheEntry* old = entry_at(probe + 0);
+  if (put_at(probe + 0, tmp, old)) {
+    enqueue_for_cleanup(old);
+  } else {
+    enqueue_for_cleanup(tmp);
+  }
 
-  assert(method->is_method(), "gaga");
+  assert(!entry_for->is_empty(), "A non-empty oop map should be returned");
+  return;
+}
+
+void OopMapCache::enqueue_for_cleanup(OopMapCacheEntry* entry) {
+  bool success = false;
+  OopMapCacheEntry* head;
+  do {
+    head = _old_entries;
+    entry->_next = head;
+    success = Atomic::cmpxchg_ptr (entry, &_old_entries, head) == head;
+  } while (!success);
 
-  entry = entry_at(probe + 0);
-  entry->fill(method, bci);
-
-  // Copy the  newly cached entry to input parameter
-  entry_for->resource_copy(entry);
+  if (log_is_enabled(Debug, interpreter, oopmap)) {
+    ResourceMark rm;
+    log_debug(interpreter, oopmap)("enqueue %s at bci %d for cleanup",
+                          entry->method()->name_and_sig_as_C_string(), entry->bci());
+  }
+}
 
-  if (TraceOopMapGeneration) {
-    ResourceMark rm;
-    tty->print("Done with ");
-    method->print_value(); tty->cr();
+// This is called after GC threads are done and nothing is accessing the old_entries
+// list, so no synchronization needed.
+void OopMapCache::cleanup_old_entries() {
+  OopMapCacheEntry* entry = _old_entries;
+  _old_entries = NULL;
+  while (entry != NULL) {
+    if (log_is_enabled(Debug, interpreter, oopmap)) {
+      ResourceMark rm;
+      log_debug(interpreter, oopmap)("cleanup entry %s at bci %d",
+                          entry->method()->name_and_sig_as_C_string(), entry->bci());
+    }
+    OopMapCacheEntry* next = entry->_next;
+    entry->flush();
+    FREE_C_HEAP_OBJ(entry);
+    entry = next;
   }
-  assert(!entry_for->is_empty(), "A non-empty oop map should be returned");
-
-  return;
 }
 
 void OopMapCache::compute_one_oop_map(const methodHandle& method, int bci, InterpreterOopMap* entry) {
--- a/hotspot/src/share/vm/interpreter/oopMapCache.hpp	Mon Aug 28 02:55:31 2017 -0700
+++ b/hotspot/src/share/vm/interpreter/oopMapCache.hpp	Mon Aug 28 09:06:30 2017 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -144,17 +144,19 @@
 };
 
 class OopMapCache : public CHeapObj<mtClass> {
+ static OopMapCacheEntry* volatile _old_entries;
  private:
   enum { _size        = 32,     // Use fixed size for now
          _probe_depth = 3       // probe depth in case of collisions
   };
 
-  OopMapCacheEntry* _array;
+  OopMapCacheEntry* volatile * _array;
 
   unsigned int hash_value_for(const methodHandle& method, int bci) const;
   OopMapCacheEntry* entry_at(int i) const;
+  bool put_at(int i, OopMapCacheEntry* entry, OopMapCacheEntry* old);
 
-  mutable Mutex _mut;
+  static void enqueue_for_cleanup(OopMapCacheEntry* entry);
 
   void flush();
 
@@ -167,13 +169,11 @@
 
   // Returns the oopMap for (method, bci) in parameter "entry".
   // Returns false if an oop map was not found.
-  void lookup(const methodHandle& method, int bci, InterpreterOopMap* entry) const;
+  void lookup(const methodHandle& method, int bci, InterpreterOopMap* entry);
 
   // Compute an oop map without updating the cache or grabbing any locks (for debugging)
   static void compute_one_oop_map(const methodHandle& method, int bci, InterpreterOopMap* entry);
-
-  // Returns total no. of bytes allocated as part of OopMapCache's
-  static long memory_usage()                     PRODUCT_RETURN0;
+  static void cleanup_old_entries();
 };
 
 #endif // SHARE_VM_INTERPRETER_OOPMAPCACHE_HPP
--- a/hotspot/src/share/vm/logging/logTag.hpp	Mon Aug 28 02:55:31 2017 -0700
+++ b/hotspot/src/share/vm/logging/logTag.hpp	Mon Aug 28 09:06:30 2017 -0400
@@ -74,6 +74,7 @@
   LOG_TAG(iklass) \
   LOG_TAG(init) \
   LOG_TAG(inlining) \
+  LOG_TAG(interpreter) \
   LOG_TAG(itables) \
   LOG_TAG(jit) \
   LOG_TAG(jni) \
--- a/hotspot/src/share/vm/oops/method.cpp	Mon Aug 28 02:55:31 2017 -0700
+++ b/hotspot/src/share/vm/oops/method.cpp	Mon Aug 28 09:06:30 2017 -0400
@@ -214,26 +214,14 @@
 }
 
 void Method::mask_for(int bci, InterpreterOopMap* mask) {
-
-  Thread* myThread    = Thread::current();
-  methodHandle h_this(myThread, this);
-#if defined(ASSERT) && !INCLUDE_JVMCI
-  bool has_capability = myThread->is_VM_thread() ||
-                        myThread->is_ConcurrentGC_thread() ||
-                        myThread->is_GC_task_thread();
-
-  if (!has_capability) {
-    if (!VerifyStack && !VerifyLastFrame) {
-      // verify stack calls this outside VM thread
-      warning("oopmap should only be accessed by the "
-              "VM, GC task or CMS threads (or during debugging)");
-      InterpreterOopMap local_mask;
-      method_holder()->mask_for(h_this, bci, &local_mask);
-      local_mask.print();
-    }
+  methodHandle h_this(Thread::current(), this);
+  // Only GC uses the OopMapCache during thread stack root scanning
+  // any other uses generate an oopmap but do not save it in the cache.
+  if (Universe::heap()->is_gc_active()) {
+    method_holder()->mask_for(h_this, bci, mask);
+  } else {
+    OopMapCache::compute_one_oop_map(h_this, bci, mask);
   }
-#endif
-  method_holder()->mask_for(h_this, bci, mask);
   return;
 }
 
--- a/hotspot/src/share/vm/runtime/globals.hpp	Mon Aug 28 02:55:31 2017 -0700
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Mon Aug 28 09:06:30 2017 -0400
@@ -715,9 +715,6 @@
   product(bool, PrintVMQWaitTime, false,                                    \
           "Print out the waiting time in VM operation queue")               \
                                                                             \
-  develop(bool, TraceOopMapGeneration, false,                               \
-          "Show OopMapGeneration")                                          \
-                                                                            \
   product(bool, MethodFlushing, true,                                       \
           "Reclamation of zombie and not-entrant methods")                  \
                                                                             \
--- a/hotspot/src/share/vm/runtime/memprofiler.cpp	Mon Aug 28 02:55:31 2017 -0700
+++ b/hotspot/src/share/vm/runtime/memprofiler.cpp	Mon Aug 28 09:06:30 2017 -0400
@@ -129,7 +129,7 @@
   fprintf(_log_fp, UINTX_FORMAT_W(6) "," UINTX_FORMAT_W(6) ",%6ld\n",
           handles_memory_usage / K,
           resource_memory_usage / K,
-          OopMapCache::memory_usage() / K);
+          0L);
   fflush(_log_fp);
 }
 
--- a/hotspot/src/share/vm/runtime/vframe.cpp	Mon Aug 28 02:55:31 2017 -0700
+++ b/hotspot/src/share/vm/runtime/vframe.cpp	Mon Aug 28 09:06:30 2017 -0400
@@ -396,14 +396,7 @@
 StackValueCollection* interpretedVFrame::stack_data(bool expressions) const {
 
   InterpreterOopMap oop_mask;
-  // oopmap for current bci
-  if ((TraceDeoptimization && Verbose) JVMCI_ONLY( || PrintDeoptimizationDetails)) {
-    methodHandle m_h(Thread::current(), method());
-    OopMapCache::compute_one_oop_map(m_h, bci(), &oop_mask);
-  } else {
-    method()->mask_for(bci(), &oop_mask);
-  }
-
+  method()->mask_for(bci(), &oop_mask);
   const int mask_len = oop_mask.number_of_entries();
 
   // If the method is native, method()->max_locals() is not telling the truth.