# HG changeset patch
# User tschatzl
# Date 1459504678 -7200
# Node ID 25ea8814a82480dc578acbfae618e6fc176696f2
# Parent  9b0d4b523ce44e0640b4bade8e37c64dacbf03cc
8148099: Improve memory access to FromCardCache during GC
Summary: Transpose the FromCardCache data array so the access during GC is linear instead of element-by-element with stride.
Reviewed-by: mgerdin, brutisso

diff -r 9b0d4b523ce4 -r 25ea8814a824 hotspot/src/share/vm/gc/g1/g1FromCardCache.cpp
--- a/hotspot/src/share/vm/gc/g1/g1FromCardCache.cpp	Fri Apr 01 07:08:55 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1FromCardCache.cpp	Fri Apr 01 11:57:58 2016 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,8 +37,8 @@
   guarantee(_cache == NULL, "Should not call this multiple times");
 
   _max_regions = max_num_regions;
-  _cache = Padded2DArray<int, mtGC>::create_unfreeable(num_par_rem_sets,
-                                                       _max_regions,
+  _cache = Padded2DArray<int, mtGC>::create_unfreeable(_max_regions,
+                                                       num_par_rem_sets,
                                                        &_static_mem_size);
 
   invalidate(0, _max_regions);
diff -r 9b0d4b523ce4 -r 25ea8814a824 hotspot/src/share/vm/gc/g1/g1FromCardCache.hpp
--- a/hotspot/src/share/vm/gc/g1/g1FromCardCache.hpp	Fri Apr 01 07:08:55 2016 +0200
+++ b/hotspot/src/share/vm/gc/g1/g1FromCardCache.hpp	Fri Apr 01 11:57:58 2016 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,8 +32,11 @@
 // a per-region and per-thread basis.
 class G1FromCardCache : public AllStatic {
  private:
-  // Array of card indices. Indexed by thread X and heap region to minimize
+  // Array of card indices. Indexed by heap region (rows) and thread (columns) to minimize
   // thread contention.
+  // This order minimizes the time to clear all entries for a given region during region
+  // freeing. I.e. a single clear of a single memory area instead of multiple separate
+  // accesses with a large stride per region.
   static int** _cache;
   static uint _max_regions;
   static size_t _static_mem_size;
@@ -58,11 +61,11 @@
   }
 
   static int at(uint worker_id, uint region_idx) {
-    return _cache[worker_id][region_idx];
+    return _cache[region_idx][worker_id];
   }
 
   static void set(uint worker_id, uint region_idx, int val) {
-    _cache[worker_id][region_idx] = val;
+    _cache[region_idx][worker_id] = val;
   }
 
   static void initialize(uint num_par_rem_sets, uint max_num_regions);