|
1 /* |
|
2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. |
|
8 * |
|
9 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
12 * version 2 for more details (a copy is included in the LICENSE file that |
|
13 * accompanied this code). |
|
14 * |
|
15 * You should have received a copy of the GNU General Public License version |
|
16 * 2 along with this work; if not, write to the Free Software Foundation, |
|
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
18 * |
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
|
20 * or visit www.oracle.com if you need additional information or have any |
|
21 * questions. |
|
22 * |
|
23 */ |
|
24 |
|
25 enum { |
|
26 // S. Gueron / Information Processing Letters 112 (2012) 184 |
|
27 // shows than anything above 6K and below 32K is a good choice |
|
28 // 32K does not deliver any further performance gains |
|
29 // 6K=8*256 (*3 as we compute 3 blocks together) |
|
30 // |
|
31 // Thus selecting the smallest value so it could apply to the largest number |
|
32 // of buffer sizes. |
|
33 CRC32C_HIGH = 8 * 256, |
|
34 |
|
35 // empirical |
|
36 // based on ubench study using methodology described in |
|
37 // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 8 |
|
38 // |
|
39 // arbitrary value between 27 and 256 |
|
40 CRC32C_MIDDLE = 8 * 86, |
|
41 |
|
42 // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 9 |
|
43 // shows that 240 and 1024 are equally good choices as the 216==8*27 |
|
44 // |
|
45 // Selecting the smallest value which resulted in a significant performance improvement over |
|
46 // sequential version |
|
47 CRC32C_LOW = 8 * 27, |
|
48 |
|
49 CRC32C_NUM_ChunkSizeInBytes = 3, |
|
50 |
|
51 // We need to compute powers of 64N and 128N for each "chunk" size |
|
52 CRC32C_NUM_PRECOMPUTED_CONSTANTS = ( 2 * CRC32C_NUM_ChunkSizeInBytes ) |
|
53 }; |
|
54 // Notes: |
|
55 // 1. Why we need to choose a "chunk" approach? |
|
56 // Overhead of computing a powers and powers of for an arbitrary buffer of size N is significant |
|
57 // (implementation approaches a library perf.) |
|
58 // 2. Why only 3 "chunks"? |
|
59 // Performance experiments results showed that a HIGH+LOW was not delivering a stable speedup |
|
60 // curve. |
|
61 // |
|
62 // Disclaimer: |
|
63 // If you ever decide to increase/decrease number of "chunks" be sure to modify |
|
64 // a) constants table generation (hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp) |
|
65 // b) constant fetch from that table (macroAssembler_x86.cpp) |
|
66 // c) unrolled for loop (macroAssembler_x86.cpp) |