# HG changeset patch # User tschatzl # Date 1459504678 -7200 # Node ID 25ea8814a82480dc578acbfae618e6fc176696f2 # Parent 9b0d4b523ce44e0640b4bade8e37c64dacbf03cc 8148099: Improve memory access to FromCardCache during GC Summary: Transpose the FromCardCache data array so the access during GC is linear instead of element-by-element with stride. Reviewed-by: mgerdin, brutisso diff -r 9b0d4b523ce4 -r 25ea8814a824 hotspot/src/share/vm/gc/g1/g1FromCardCache.cpp --- a/hotspot/src/share/vm/gc/g1/g1FromCardCache.cpp Fri Apr 01 07:08:55 2016 +0200 +++ b/hotspot/src/share/vm/gc/g1/g1FromCardCache.cpp Fri Apr 01 11:57:58 2016 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -37,8 +37,8 @@ guarantee(_cache == NULL, "Should not call this multiple times"); _max_regions = max_num_regions; - _cache = Padded2DArray::create_unfreeable(num_par_rem_sets, - _max_regions, + _cache = Padded2DArray::create_unfreeable(_max_regions, + num_par_rem_sets, &_static_mem_size); invalidate(0, _max_regions); diff -r 9b0d4b523ce4 -r 25ea8814a824 hotspot/src/share/vm/gc/g1/g1FromCardCache.hpp --- a/hotspot/src/share/vm/gc/g1/g1FromCardCache.hpp Fri Apr 01 07:08:55 2016 +0200 +++ b/hotspot/src/share/vm/gc/g1/g1FromCardCache.hpp Fri Apr 01 11:57:58 2016 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,8 +32,11 @@ // a per-region and per-thread basis. class G1FromCardCache : public AllStatic { private: - // Array of card indices. Indexed by thread X and heap region to minimize + // Array of card indices. Indexed by heap region (rows) and thread (columns) to minimize // thread contention. + // This order minimizes the time to clear all entries for a given region during region + // freeing. I.e. a single clear of a single memory area instead of multiple separate + // accesses with a large stride per region. static int** _cache; static uint _max_regions; static size_t _static_mem_size; @@ -58,11 +61,11 @@ } static int at(uint worker_id, uint region_idx) { - return _cache[worker_id][region_idx]; + return _cache[region_idx][worker_id]; } static void set(uint worker_id, uint region_idx, int val) { - _cache[worker_id][region_idx] = val; + _cache[region_idx][worker_id] = val; } static void initialize(uint num_par_rem_sets, uint max_num_regions);