8058968: Compiler time traces should be improved
authorshade
Thu, 25 Sep 2014 12:10:57 +0400
changeset 26913 9ad70cd32368
parent 26912 19021f626ad2
child 26914 a270e0fac50e
8058968: Compiler time traces should be improved Summary: Use CITime consistently, make C2 to report to CompilerBroker, more probes. Reviewed-by: vlivanov, jrose
hotspot/src/share/vm/c1/c1_Compilation.cpp
hotspot/src/share/vm/compiler/compileBroker.cpp
hotspot/src/share/vm/compiler/compileBroker.hpp
hotspot/src/share/vm/opto/buildOopMap.cpp
hotspot/src/share/vm/opto/c2_globals.hpp
hotspot/src/share/vm/opto/c2compiler.cpp
hotspot/src/share/vm/opto/chaitin.cpp
hotspot/src/share/vm/opto/compile.cpp
hotspot/src/share/vm/opto/compile.hpp
hotspot/src/share/vm/opto/escape.cpp
hotspot/src/share/vm/opto/ifg.cpp
hotspot/src/share/vm/opto/output.cpp
hotspot/src/share/vm/opto/parse1.cpp
hotspot/src/share/vm/opto/phase.cpp
hotspot/src/share/vm/opto/phase.hpp
hotspot/src/share/vm/opto/postaloc.cpp
hotspot/src/share/vm/opto/reg_split.cpp
hotspot/src/share/vm/runtime/globals.hpp
hotspot/src/share/vm/runtime/java.cpp
--- a/hotspot/src/share/vm/c1/c1_Compilation.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/c1/c1_Compilation.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -38,17 +38,18 @@
 
 typedef enum {
   _t_compile,
-  _t_setup,
-  _t_buildIR,
-  _t_optimize_blocks,
-  _t_optimize_null_checks,
-  _t_rangeCheckElimination,
-  _t_emit_lir,
-  _t_linearScan,
-  _t_lirGeneration,
-  _t_lir_schedule,
-  _t_codeemit,
-  _t_codeinstall,
+    _t_setup,
+    _t_buildIR,
+      _t_hir_parse,
+      _t_gvn,
+      _t_optimize_blocks,
+      _t_optimize_null_checks,
+      _t_rangeCheckElimination,
+    _t_emit_lir,
+      _t_linearScan,
+      _t_lirGeneration,
+    _t_codeemit,
+    _t_codeinstall,
   max_phase_timers
 } TimerName;
 
@@ -56,13 +57,14 @@
   "compile",
   "setup",
   "buildIR",
+  "parse_hir",
+  "gvn",
   "optimize_blocks",
   "optimize_null_checks",
   "rangeCheckElimination",
   "emit_lir",
   "linearScan",
   "lirGeneration",
-  "lir_schedule",
   "codeemit",
   "codeinstall"
 };
@@ -144,7 +146,10 @@
     log->stamp();
     log->end_head();
   }
-  _hir = new IR(this, method(), osr_bci());
+  {
+    PhaseTraceTime timeit(_t_hir_parse);
+    _hir = new IR(this, method(), osr_bci());
+  }
   if (log)  log->done("parse");
   if (!_hir->is_valid()) {
     bailout("invalid parsing");
@@ -189,6 +194,7 @@
 
   if (UseGlobalValueNumbering) {
     // No resource mark here! LoopInvariantCodeMotion can allocate ValueStack objects.
+    PhaseTraceTime timeit(_t_gvn);
     int instructions = Instruction::number_of_instructions();
     GlobalValueNumbering gvn(_hir);
     assert(instructions == Instruction::number_of_instructions(),
@@ -419,8 +425,12 @@
 
 
 void Compilation::compile_method() {
-  // setup compilation
-  initialize();
+  {
+    PhaseTraceTime timeit(_t_setup);
+
+    // setup compilation
+    initialize();
+  }
 
   if (!method()->can_be_compiled()) {
     // Prevent race condition 6328518.
@@ -615,24 +625,54 @@
 }
 
 void Compilation::print_timers() {
-  // tty->print_cr("    Native methods         : %6.3f s, Average : %2.3f", CompileBroker::_t_native_compilation.seconds(), CompileBroker::_t_native_compilation.seconds() / CompileBroker::_total_native_compile_count);
-  float total = timers[_t_setup].seconds() + timers[_t_buildIR].seconds() + timers[_t_emit_lir].seconds() + timers[_t_lir_schedule].seconds() + timers[_t_codeemit].seconds() + timers[_t_codeinstall].seconds();
+  tty->print_cr("    C1 Compile Time:      %7.3f s",      timers[_t_compile].seconds());
+  tty->print_cr("       Setup time:          %7.3f s",    timers[_t_setup].seconds());
+
+  {
+    tty->print_cr("       Build HIR:           %7.3f s",    timers[_t_buildIR].seconds());
+    tty->print_cr("         Parse:               %7.3f s", timers[_t_hir_parse].seconds());
+    tty->print_cr("         Optimize blocks:     %7.3f s", timers[_t_optimize_blocks].seconds());
+    tty->print_cr("         GVN:                 %7.3f s", timers[_t_gvn].seconds());
+    tty->print_cr("         Null checks elim:    %7.3f s", timers[_t_optimize_null_checks].seconds());
+    tty->print_cr("         Range checks elim:   %7.3f s", timers[_t_rangeCheckElimination].seconds());
 
+    double other = timers[_t_buildIR].seconds() -
+      (timers[_t_hir_parse].seconds() +
+       timers[_t_optimize_blocks].seconds() +
+       timers[_t_gvn].seconds() +
+       timers[_t_optimize_null_checks].seconds() +
+       timers[_t_rangeCheckElimination].seconds());
+    if (other > 0) {
+      tty->print_cr("         Other:               %7.3f s", other);
+    }
+  }
 
-  tty->print_cr("    Detailed C1 Timings");
-  tty->print_cr("       Setup time:        %6.3f s (%4.1f%%)",    timers[_t_setup].seconds(),           (timers[_t_setup].seconds() / total) * 100.0);
-  tty->print_cr("       Build IR:          %6.3f s (%4.1f%%)",    timers[_t_buildIR].seconds(),         (timers[_t_buildIR].seconds() / total) * 100.0);
-  float t_optimizeIR = timers[_t_optimize_blocks].seconds() + timers[_t_optimize_null_checks].seconds();
-  tty->print_cr("         Optimize:           %6.3f s (%4.1f%%)", t_optimizeIR,                         (t_optimizeIR / total) * 100.0);
-  tty->print_cr("         RCE:                %6.3f s (%4.1f%%)", timers[_t_rangeCheckElimination].seconds(),      (timers[_t_rangeCheckElimination].seconds() / total) * 100.0);
-  tty->print_cr("       Emit LIR:          %6.3f s (%4.1f%%)",    timers[_t_emit_lir].seconds(),        (timers[_t_emit_lir].seconds() / total) * 100.0);
-  tty->print_cr("         LIR Gen:          %6.3f s (%4.1f%%)",   timers[_t_lirGeneration].seconds(), (timers[_t_lirGeneration].seconds() / total) * 100.0);
-  tty->print_cr("         Linear Scan:      %6.3f s (%4.1f%%)",   timers[_t_linearScan].seconds(),    (timers[_t_linearScan].seconds() / total) * 100.0);
-  NOT_PRODUCT(LinearScan::print_timers(timers[_t_linearScan].seconds()));
-  tty->print_cr("       LIR Schedule:      %6.3f s (%4.1f%%)",    timers[_t_lir_schedule].seconds(),  (timers[_t_lir_schedule].seconds() / total) * 100.0);
-  tty->print_cr("       Code Emission:     %6.3f s (%4.1f%%)",    timers[_t_codeemit].seconds(),        (timers[_t_codeemit].seconds() / total) * 100.0);
-  tty->print_cr("       Code Installation: %6.3f s (%4.1f%%)",    timers[_t_codeinstall].seconds(),     (timers[_t_codeinstall].seconds() / total) * 100.0);
-  tty->print_cr("       Instruction Nodes: %6d nodes",    totalInstructionNodes);
+  {
+    tty->print_cr("       Emit LIR:            %7.3f s",    timers[_t_emit_lir].seconds());
+    tty->print_cr("         LIR Gen:             %7.3f s",   timers[_t_lirGeneration].seconds());
+    tty->print_cr("         Linear Scan:         %7.3f s",   timers[_t_linearScan].seconds());
+    NOT_PRODUCT(LinearScan::print_timers(timers[_t_linearScan].seconds()));
+
+    double other = timers[_t_emit_lir].seconds() -
+      (timers[_t_lirGeneration].seconds() +
+       timers[_t_linearScan].seconds());
+    if (other > 0) {
+      tty->print_cr("         Other:               %7.3f s", other);
+    }
+  }
+
+  tty->print_cr("       Code Emission:       %7.3f s",    timers[_t_codeemit].seconds());
+  tty->print_cr("       Code Installation:   %7.3f s",    timers[_t_codeinstall].seconds());
+
+  double other = timers[_t_compile].seconds() -
+      (timers[_t_setup].seconds() +
+       timers[_t_buildIR].seconds() +
+       timers[_t_emit_lir].seconds() +
+       timers[_t_codeemit].seconds() +
+       timers[_t_codeinstall].seconds());
+  if (other > 0) {
+    tty->print_cr("       Other:               %7.3f s", other);
+  }
 
   NOT_PRODUCT(LinearScan::print_statistics());
 }
--- a/hotspot/src/share/vm/compiler/compileBroker.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/compiler/compileBroker.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -137,6 +137,8 @@
 elapsedTimer CompileBroker::_t_total_compilation;
 elapsedTimer CompileBroker::_t_osr_compilation;
 elapsedTimer CompileBroker::_t_standard_compilation;
+elapsedTimer CompileBroker::_t_invalidated_compilation;
+elapsedTimer CompileBroker::_t_bailedout_compilation;
 
 int CompileBroker::_total_bailout_count          = 0;
 int CompileBroker::_total_invalidated_count      = 0;
@@ -2236,6 +2238,11 @@
   // _perf variables are production performance counters which are
   // updated regardless of the setting of the CITime and CITimeEach flags
   //
+
+  // account all time, including bailouts and failures in this counter;
+  // C1 and C2 counters are counting both successful and unsuccessful compiles
+  _t_total_compilation.add(time);
+
   if (!success) {
     _total_bailout_count++;
     if (UsePerfData) {
@@ -2243,6 +2250,7 @@
       _perf_last_failed_type->set_value(counters->compile_type());
       _perf_total_bailout_count->inc();
     }
+    _t_bailedout_compilation.add(time);
   } else if (code == NULL) {
     if (UsePerfData) {
       _perf_last_invalidated_method->set_value(counters->current_method());
@@ -2250,14 +2258,13 @@
       _perf_total_invalidated_count->inc();
     }
     _total_invalidated_count++;
+    _t_invalidated_compilation.add(time);
   } else {
     // Compilation succeeded
 
     // update compilation ticks - used by the implementation of
     // java.lang.management.CompilationMBean
     _perf_total_compilation->inc(time.ticks());
-
-    _t_total_compilation.add(time);
     _peak_compilation_time = time.milliseconds() > _peak_compilation_time ? time.milliseconds() : _peak_compilation_time;
 
     if (CITime) {
@@ -2325,37 +2332,47 @@
 
 void CompileBroker::print_times() {
   tty->cr();
-  tty->print_cr("Accumulated compiler times (for compiled methods only)");
-  tty->print_cr("------------------------------------------------");
+  tty->print_cr("Accumulated compiler times");
+  tty->print_cr("----------------------------------------------------------");
                //0000000000111111111122222222223333333333444444444455555555556666666666
                //0123456789012345678901234567890123456789012345678901234567890123456789
-  tty->print_cr("  Total compilation time   : %6.3f s", CompileBroker::_t_total_compilation.seconds());
-  tty->print_cr("    Standard compilation   : %6.3f s, Average : %2.3f",
+  tty->print_cr("  Total compilation time   : %7.3f s", CompileBroker::_t_total_compilation.seconds());
+  tty->print_cr("    Standard compilation   : %7.3f s, Average : %2.3f s",
                 CompileBroker::_t_standard_compilation.seconds(),
                 CompileBroker::_t_standard_compilation.seconds() / CompileBroker::_total_standard_compile_count);
-  tty->print_cr("    On stack replacement   : %6.3f s, Average : %2.3f", CompileBroker::_t_osr_compilation.seconds(), CompileBroker::_t_osr_compilation.seconds() / CompileBroker::_total_osr_compile_count);
+  tty->print_cr("    Bailed out compilation : %7.3f s, Average : %2.3f s",
+                CompileBroker::_t_bailedout_compilation.seconds(),
+                CompileBroker::_t_bailedout_compilation.seconds() / CompileBroker::_total_bailout_count);
+  tty->print_cr("    On stack replacement   : %7.3f s, Average : %2.3f s",
+                CompileBroker::_t_osr_compilation.seconds(),
+                CompileBroker::_t_osr_compilation.seconds() / CompileBroker::_total_osr_compile_count);
+  tty->print_cr("    Invalidated            : %7.3f s, Average : %2.3f s",
+                CompileBroker::_t_invalidated_compilation.seconds(),
+                CompileBroker::_t_invalidated_compilation.seconds() / CompileBroker::_total_invalidated_count);
 
   AbstractCompiler *comp = compiler(CompLevel_simple);
   if (comp != NULL) {
+    tty->cr();
     comp->print_timers();
   }
   comp = compiler(CompLevel_full_optimization);
   if (comp != NULL) {
+    tty->cr();
     comp->print_timers();
   }
   tty->cr();
-  tty->print_cr("  Total compiled methods   : %6d methods", CompileBroker::_total_compile_count);
-  tty->print_cr("    Standard compilation   : %6d methods", CompileBroker::_total_standard_compile_count);
-  tty->print_cr("    On stack replacement   : %6d methods", CompileBroker::_total_osr_compile_count);
+  tty->print_cr("  Total compiled methods    : %8d methods", CompileBroker::_total_compile_count);
+  tty->print_cr("    Standard compilation    : %8d methods", CompileBroker::_total_standard_compile_count);
+  tty->print_cr("    On stack replacement    : %8d methods", CompileBroker::_total_osr_compile_count);
   int tcb = CompileBroker::_sum_osr_bytes_compiled + CompileBroker::_sum_standard_bytes_compiled;
-  tty->print_cr("  Total compiled bytecodes : %6d bytes", tcb);
-  tty->print_cr("    Standard compilation   : %6d bytes", CompileBroker::_sum_standard_bytes_compiled);
-  tty->print_cr("    On stack replacement   : %6d bytes", CompileBroker::_sum_osr_bytes_compiled);
+  tty->print_cr("  Total compiled bytecodes  : %8d bytes", tcb);
+  tty->print_cr("    Standard compilation    : %8d bytes", CompileBroker::_sum_standard_bytes_compiled);
+  tty->print_cr("    On stack replacement    : %8d bytes", CompileBroker::_sum_osr_bytes_compiled);
   int bps = (int)(tcb / CompileBroker::_t_total_compilation.seconds());
-  tty->print_cr("  Average compilation speed: %6d bytes/s", bps);
+  tty->print_cr("  Average compilation speed : %8d bytes/s", bps);
   tty->cr();
-  tty->print_cr("  nmethod code size        : %6d bytes", CompileBroker::_sum_nmethod_code_size);
-  tty->print_cr("  nmethod total size       : %6d bytes", CompileBroker::_sum_nmethod_size);
+  tty->print_cr("  nmethod code size         : %8d bytes", CompileBroker::_sum_nmethod_code_size);
+  tty->print_cr("  nmethod total size        : %8d bytes", CompileBroker::_sum_nmethod_size);
 }
 
 // Debugging output for failure
--- a/hotspot/src/share/vm/compiler/compileBroker.hpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/compiler/compileBroker.hpp	Thu Sep 25 12:10:57 2014 +0400
@@ -322,6 +322,8 @@
   static elapsedTimer _t_total_compilation;
   static elapsedTimer _t_osr_compilation;
   static elapsedTimer _t_standard_compilation;
+  static elapsedTimer _t_invalidated_compilation;
+  static elapsedTimer _t_bailedout_compilation;
 
   static int _total_compile_count;
   static int _total_bailout_count;
--- a/hotspot/src/share/vm/opto/buildOopMap.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/buildOopMap.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -555,7 +555,7 @@
 
 // Collect GC mask info - where are all the OOPs?
 void Compile::BuildOopMaps() {
-  NOT_PRODUCT( TracePhase t3("bldOopMaps", &_t_buildOopMaps, TimeCompiler); )
+  TracePhase tp("bldOopMaps", &timers[_t_buildOopMaps]);
   // Can't resource-mark because I need to leave all those OopMaps around,
   // or else I need to resource-mark some arena other than the default.
   // ResourceMark rm;              // Reclaim all OopFlows when done
--- a/hotspot/src/share/vm/opto/c2_globals.hpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp	Thu Sep 25 12:10:57 2014 +0400
@@ -590,9 +590,6 @@
   product(intx, TypeProfileMajorReceiverPercent, 90,                        \
           "% of major receiver type to all profiled receivers")             \
                                                                             \
-  notproduct(bool, TimeCompiler2, false,                                    \
-          "detailed time the compiler (requires +TimeCompiler)")            \
-                                                                            \
   diagnostic(bool, PrintIntrinsics, false,                                  \
           "prints attempted and successful inlining of intrinsics")         \
                                                                             \
--- a/hotspot/src/share/vm/opto/c2compiler.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/c2compiler.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -147,7 +147,7 @@
 
 
 void C2Compiler::print_timers() {
-  // do nothing
+  Compile::print_timers();
 }
 
 int C2Compiler::initial_code_buffer_size() {
--- a/hotspot/src/share/vm/opto/chaitin.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/chaitin.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -209,7 +209,7 @@
   , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
 #endif
 {
-  NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
+  Compile::TracePhase tp("ctorChaitin", &timers[_t_ctorChaitin]);
 
   _high_frequency_lrg = MIN2(double(OPTO_LRG_HIGH_FREQ), _cfg.get_outer_loop_frequency());
 
@@ -295,6 +295,8 @@
 
 // Renumber the live ranges to compact them.  Makes the IFG smaller.
 void PhaseChaitin::compact() {
+  Compile::TracePhase tp("chaitinCompact", &timers[_t_chaitinCompact]);
+
   // Current the _uf_map contains a series of short chains which are headed
   // by a self-cycle.  All the chains run from big numbers to little numbers.
   // The Find() call chases the chains & shortens them for the next Find call.
@@ -369,7 +371,7 @@
 #endif
 
   {
-    NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+    Compile::TracePhase tp("computeLive", &timers[_t_computeLive]);
     _live = NULL;                 // Mark live as being not available
     rm.reset_to_mark();           // Reclaim working storage
     IndexSet::reset_memory(C, &live_arena);
@@ -386,7 +388,7 @@
   // at all the GC points, and "stretches" the live range of any base pointer
   // to the GC point.
   if (stretch_base_pointer_live_ranges(&live_arena)) {
-    NOT_PRODUCT(Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler);)
+    Compile::TracePhase tp("computeLive (sbplr)", &timers[_t_computeLive]);
     // Since some live range stretched, I need to recompute live
     _live = NULL;
     rm.reset_to_mark();         // Reclaim working storage
@@ -399,17 +401,19 @@
   // Create the interference graph using virtual copies
   build_ifg_virtual();  // Include stack slots this time
 
+  // The IFG is/was triangular.  I am 'squaring it up' so Union can run
+  // faster.  Union requires a 'for all' operation which is slow on the
+  // triangular adjacency matrix (quick reminder: the IFG is 'sparse' -
+  // meaning I can visit all the Nodes neighbors less than a Node in time
+  // O(# of neighbors), but I have to visit all the Nodes greater than a
+  // given Node and search them for an instance, i.e., time O(#MaxLRG)).
+  _ifg->SquareUp();
+
   // Aggressive (but pessimistic) copy coalescing.
   // This pass works on virtual copies.  Any virtual copies which are not
   // coalesced get manifested as actual copies
   {
-    // The IFG is/was triangular.  I am 'squaring it up' so Union can run
-    // faster.  Union requires a 'for all' operation which is slow on the
-    // triangular adjacency matrix (quick reminder: the IFG is 'sparse' -
-    // meaning I can visit all the Nodes neighbors less than a Node in time
-    // O(# of neighbors), but I have to visit all the Nodes greater than a
-    // given Node and search them for an instance, i.e., time O(#MaxLRG)).
-    _ifg->SquareUp();
+    Compile::TracePhase tp("chaitinCoalesce1", &timers[_t_chaitinCoalesce1]);
 
     PhaseAggressiveCoalesce coalesce(*this);
     coalesce.coalesce_driver();
@@ -424,7 +428,7 @@
   // After aggressive coalesce, attempt a first cut at coloring.
   // To color, we need the IFG and for that we need LIVE.
   {
-    NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+    Compile::TracePhase tp("computeLive", &timers[_t_computeLive]);
     _live = NULL;
     rm.reset_to_mark();           // Reclaim working storage
     IndexSet::reset_memory(C, &live_arena);
@@ -462,7 +466,7 @@
     compact();                  // Compact LRGs; return new lower max lrg
 
     {
-      NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+      Compile::TracePhase tp("computeLive", &timers[_t_computeLive]);
       _live = NULL;
       rm.reset_to_mark();         // Reclaim working storage
       IndexSet::reset_memory(C, &live_arena);
@@ -476,6 +480,7 @@
     _ifg->Compute_Effective_Degree();
     // Only do conservative coalescing if requested
     if (OptoCoalesce) {
+      Compile::TracePhase tp("chaitinCoalesce2", &timers[_t_chaitinCoalesce2]);
       // Conservative (and pessimistic) copy coalescing of those spills
       PhaseConservativeCoalesce coalesce(*this);
       // If max live ranges greater than cutoff, don't color the stack.
@@ -531,7 +536,7 @@
 
     // Nuke the live-ness and interference graph and LiveRanGe info
     {
-      NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+      Compile::TracePhase tp("computeLive", &timers[_t_computeLive]);
       _live = NULL;
       rm.reset_to_mark();         // Reclaim working storage
       IndexSet::reset_memory(C, &live_arena);
@@ -549,6 +554,7 @@
 
     // Only do conservative coalescing if requested
     if (OptoCoalesce) {
+      Compile::TracePhase tp("chaitinCoalesce3", &timers[_t_chaitinCoalesce3]);
       // Conservative (and pessimistic) copy coalescing
       PhaseConservativeCoalesce coalesce(*this);
       // Check for few live ranges determines how aggressive coalesce is.
@@ -1054,6 +1060,7 @@
 
 // Compute cost/area ratio, in case we spill.  Build the lo-degree list.
 void PhaseChaitin::cache_lrg_info( ) {
+  Compile::TracePhase tp("chaitinCacheLRG", &timers[_t_chaitinCacheLRG]);
 
   for (uint i = 1; i < _lrg_map.max_lrg_id(); i++) {
     LRG &lrg = lrgs(i);
@@ -1137,6 +1144,7 @@
 
 // Simplify the IFG by removing LRGs of low degree.
 void PhaseChaitin::Simplify( ) {
+  Compile::TracePhase tp("chaitinSimplify", &timers[_t_chaitinSimplify]);
 
   while( 1 ) {                  // Repeat till simplified it all
     // May want to explore simplifying lo_degree before _lo_stk_degree.
@@ -1384,6 +1392,8 @@
 // everything going back is guaranteed a color.  Select that color.  If some
 // hi-degree LRG cannot get a color then we record that we must spill.
 uint PhaseChaitin::Select( ) {
+  Compile::TracePhase tp("chaitinSelect", &timers[_t_chaitinSelect]);
+
   uint spill_reg = LRG::SPILL_REG;
   _max_reg = OptoReg::Name(0);  // Past max register used
   while( _simplified ) {
@@ -1577,7 +1587,7 @@
   // This function does only cisc spill work.
   if( !UseCISCSpill ) return;
 
-  NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )
+  Compile::TracePhase tp("fixupSpills", &timers[_t_fixupSpills]);
 
   // Grab the Frame Pointer
   Node *fp = _cfg.get_root_block()->head()->in(1)->in(TypeFunc::FramePtr);
--- a/hotspot/src/share/vm/opto/compile.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/compile.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -667,16 +667,18 @@
   C = this;
 
   CompileWrapper cw(this);
-#ifndef PRODUCT
-  if (TimeCompiler2) {
+
+  if (CITimeVerbose) {
     tty->print(" ");
     target->holder()->name()->print();
     tty->print(".");
     target->print_short_name();
     tty->print("  ");
   }
-  TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
-  TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
+  TraceTime t1("Total compilation time", &_t_totalCompilation, CITime, CITimeVerbose);
+  TraceTime t2(NULL, &_t_methodCompilation, CITime, false);
+
+#ifndef PRODUCT
   bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");
   if (!print_opto_assembly) {
     bool print_assembly = (PrintAssembly || _method->should_print_assembly());
@@ -726,7 +728,7 @@
 
   print_inlining_init();
   { // Scope for timing the parser
-    TracePhase t3("parse", &_t_parser, true);
+    TracePhase tp("parse", &timers[_t_parser]);
 
     // Put top into the hash table ASAP.
     initial_gvn()->transform_no_reclaim(top());
@@ -888,8 +890,8 @@
       record_method_not_compilable("+OptoNoExecute");  // Flag as failed
       return;
     }
-    TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);
 #endif
+    TracePhase tp("install_code", &timers[_t_registerMethod]);
 
     if (is_osr_compilation()) {
       _code_offsets.set_value(CodeOffsets::Verified_Entry, 0);
@@ -977,9 +979,10 @@
     _interpreter_frame_size(0) {
   C = this;
 
+  TraceTime t1(NULL, &_t_totalCompilation, CITime, false);
+  TraceTime t2(NULL, &_t_stubCompilation, CITime, false);
+
 #ifndef PRODUCT
-  TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
-  TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
   set_print_assembly(PrintFrameConverterAssembly);
   set_parsed_irreducible_loop(false);
 #endif
@@ -1949,30 +1952,38 @@
   for_igvn()->clear();
   gvn->replace_with(&igvn);
 
-  int i = 0;
-
-  for (; i <_late_inlines.length() && !inlining_progress(); i++) {
-    CallGenerator* cg = _late_inlines.at(i);
-    _late_inlines_pos = i+1;
-    cg->do_late_inline();
-    if (failing())  return;
+  {
+    TracePhase tp("incrementalInline_inline", &timers[_t_incrInline_inline]);
+    int i = 0;
+    for (; i <_late_inlines.length() && !inlining_progress(); i++) {
+      CallGenerator* cg = _late_inlines.at(i);
+      _late_inlines_pos = i+1;
+      cg->do_late_inline();
+      if (failing())  return;
+    }
+    int j = 0;
+    for (; i < _late_inlines.length(); i++, j++) {
+      _late_inlines.at_put(j, _late_inlines.at(i));
+    }
+    _late_inlines.trunc_to(j);
   }
-  int j = 0;
-  for (; i < _late_inlines.length(); i++, j++) {
-    _late_inlines.at_put(j, _late_inlines.at(i));
-  }
-  _late_inlines.trunc_to(j);
 
   {
+    TracePhase tp("incrementalInline_pru", &timers[_t_incrInline_pru]);
     ResourceMark rm;
     PhaseRemoveUseless pru(gvn, for_igvn());
   }
 
-  igvn = PhaseIterGVN(gvn);
+  {
+    TracePhase tp("incrementalInline_igvn", &timers[_t_incrInline_igvn]);
+    igvn = PhaseIterGVN(gvn);
+  }
 }
 
 // Perform incremental inlining until bound on number of live nodes is reached
 void Compile::inline_incrementally(PhaseIterGVN& igvn) {
+  TracePhase tp("incrementalInline", &timers[_t_incrInline]);
+
   PhaseGVN* gvn = initial_gvn();
 
   set_inlining_incrementally(true);
@@ -1983,6 +1994,7 @@
 
     if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
       if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {
+        TracePhase tp("incrementalInline_ideal", &timers[_t_incrInline_ideal]);
         // PhaseIdealLoop is expensive so we only try it once we are
         // out of live nodes and we only try it again if the previous
         // helped got the number of nodes down significantly
@@ -2001,7 +2013,10 @@
 
     if (failing())  return;
 
-    igvn.optimize();
+    {
+      TracePhase tp("incrementalInline_igvn", &timers[_t_incrInline_igvn]);
+      igvn.optimize();
+    }
 
     if (failing())  return;
   }
@@ -2018,13 +2033,16 @@
     if (failing())  return;
 
     {
+      TracePhase tp("incrementalInline_pru", &timers[_t_incrInline_pru]);
       ResourceMark rm;
       PhaseRemoveUseless pru(initial_gvn(), for_igvn());
     }
 
-    igvn = PhaseIterGVN(gvn);
-
-    igvn.optimize();
+    {
+      TracePhase tp("incrementalInline_igvn", &timers[_t_incrInline_igvn]);
+      igvn = PhaseIterGVN(gvn);
+      igvn.optimize();
+    }
   }
 
   set_inlining_incrementally(false);
@@ -2034,7 +2052,7 @@
 //------------------------------Optimize---------------------------------------
 // Given a graph, optimize it.
 void Compile::Optimize() {
-  TracePhase t1("optimizer", &_t_optimizer, true);
+  TracePhase tp("optimizer", &timers[_t_optimizer]);
 
 #ifndef PRODUCT
   if (env()->break_at_compile()) {
@@ -2060,7 +2078,7 @@
   _modified_nodes = new (comp_arena()) Unique_Node_List(comp_arena());
 #endif
   {
-    NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )
+    TracePhase tp("iterGVN", &timers[_t_iterGVN]);
     igvn.optimize();
   }
 
@@ -2068,17 +2086,13 @@
 
   if (failing())  return;
 
-  {
-    NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
-    inline_incrementally(igvn);
-  }
+  inline_incrementally(igvn);
 
   print_method(PHASE_INCREMENTAL_INLINE, 2);
 
   if (failing())  return;
 
   if (eliminate_boxing()) {
-    NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
     // Inline valueOf() methods now.
     inline_boxing_calls(igvn);
 
@@ -2105,7 +2119,7 @@
   if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
     if (has_loops()) {
       // Cleanup graph (remove dead nodes).
-      TracePhase t2("idealLoop", &_t_idealLoop, true);
+      TracePhase tp("idealLoop", &timers[_t_idealLoop]);
       PhaseIdealLoop ideal_loop( igvn, false, true );
       if (major_progress()) print_method(PHASE_PHASEIDEAL_BEFORE_EA, 2);
       if (failing())  return;
@@ -2121,7 +2135,7 @@
     if (failing())  return;
 
     if (congraph() != NULL && macro_count() > 0) {
-      NOT_PRODUCT( TracePhase t2("macroEliminate", &_t_macroEliminate, TimeCompiler); )
+      TracePhase tp("macroEliminate", &timers[_t_macroEliminate]);
       PhaseMacroExpand mexp(igvn);
       mexp.eliminate_macro_nodes();
       igvn.set_delay_transform(false);
@@ -2140,7 +2154,7 @@
   loop_opts_cnt = num_loop_opts();
   if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
     {
-      TracePhase t2("idealLoop", &_t_idealLoop, true);
+      TracePhase tp("idealLoop", &timers[_t_idealLoop]);
       PhaseIdealLoop ideal_loop( igvn, true );
       loop_opts_cnt--;
       if (major_progress()) print_method(PHASE_PHASEIDEALLOOP1, 2);
@@ -2148,7 +2162,7 @@
     }
     // Loop opts pass if partial peeling occurred in previous pass
     if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
-      TracePhase t3("idealLoop", &_t_idealLoop, true);
+      TracePhase tp("idealLoop", &timers[_t_idealLoop]);
       PhaseIdealLoop ideal_loop( igvn, false );
       loop_opts_cnt--;
       if (major_progress()) print_method(PHASE_PHASEIDEALLOOP2, 2);
@@ -2156,14 +2170,14 @@
     }
     // Loop opts pass for loop-unrolling before CCP
     if(major_progress() && (loop_opts_cnt > 0)) {
-      TracePhase t4("idealLoop", &_t_idealLoop, true);
+      TracePhase tp("idealLoop", &timers[_t_idealLoop]);
       PhaseIdealLoop ideal_loop( igvn, false );
       loop_opts_cnt--;
       if (major_progress()) print_method(PHASE_PHASEIDEALLOOP3, 2);
     }
     if (!failing()) {
       // Verify that last round of loop opts produced a valid graph
-      NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
+      TracePhase tp("idealLoopVerify", &timers[_t_idealLoopVerify]);
       PhaseIdealLoop::verify(igvn);
     }
   }
@@ -2173,7 +2187,7 @@
   PhaseCCP ccp( &igvn );
   assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
   {
-    TracePhase t2("ccp", &_t_ccp, true);
+    TracePhase tp("ccp", &timers[_t_ccp]);
     ccp.do_transform();
   }
   print_method(PHASE_CPP1, 2);
@@ -2182,7 +2196,7 @@
 
   // Iterative Global Value Numbering, including ideal transforms
   {
-    NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )
+    TracePhase tp("iterGVN2", &timers[_t_iterGVN2]);
     igvn = ccp;
     igvn.optimize();
   }
@@ -2196,7 +2210,7 @@
   if(loop_opts_cnt > 0) {
     debug_only( int cnt = 0; );
     while(major_progress() && (loop_opts_cnt > 0)) {
-      TracePhase t2("idealLoop", &_t_idealLoop, true);
+      TracePhase tp("idealLoop", &timers[_t_idealLoop]);
       assert( cnt++ < 40, "infinite cycle in loop optimization" );
       PhaseIdealLoop ideal_loop( igvn, true);
       loop_opts_cnt--;
@@ -2208,12 +2222,12 @@
   {
     // Verify that all previous optimizations produced a valid graph
     // at least to this point, even if no loop optimizations were done.
-    NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
+    TracePhase tp("idealLoopVerify", &timers[_t_idealLoopVerify]);
     PhaseIdealLoop::verify(igvn);
   }
 
   {
-    NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
+    TracePhase tp("macroExpand", &timers[_t_macroExpand]);
     PhaseMacroExpand  mex(igvn);
     if (mex.expand_macro_nodes()) {
       assert(failing(), "must bail out w/ explicit message");
@@ -2227,7 +2241,7 @@
   process_print_inlining();
   // A method with only infinite loops has no edges entering loops from root
   {
-    NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
+    TracePhase tp("graphReshape", &timers[_t_graphReshaping]);
     if (final_graph_reshaping()) {
       assert(failing(), "must bail out w/ explicit message");
       return;
@@ -2258,7 +2272,7 @@
   Matcher matcher;
   _matcher = &matcher;
   {
-    TracePhase t2("matcher", &_t_matcher, true);
+    TracePhase tp("matcher", &timers[_t_matcher]);
     matcher.match();
   }
   // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
@@ -2275,7 +2289,7 @@
   PhaseCFG cfg(node_arena(), root(), matcher);
   _cfg = &cfg;
   {
-    NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
+    TracePhase tp("scheduler", &timers[_t_scheduler]);
     bool success = cfg.do_global_code_motion();
     if (!success) {
       return;
@@ -2289,7 +2303,7 @@
   PhaseChaitin regalloc(unique(), cfg, matcher);
   _regalloc = &regalloc;
   {
-    TracePhase t2("regalloc", &_t_registerAllocation, true);
+    TracePhase tp("regalloc", &timers[_t_registerAllocation]);
     // Perform register allocation.  After Chaitin, use-def chains are
     // no longer accurate (at spill code) and so must be ignored.
     // Node->LRG->reg mappings are still accurate.
@@ -2306,7 +2320,7 @@
   // are not adding any new instructions.  If any basic block is empty, we
   // can now safely remove it.
   {
-    NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
+    TracePhase tp("blockOrdering", &timers[_t_blockOrdering]);
     cfg.remove_empty_blocks();
     if (do_freq_based_layout()) {
       PhaseBlockLayout layout(cfg);
@@ -2318,22 +2332,20 @@
 
   // Apply peephole optimizations
   if( OptoPeephole ) {
-    NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
+    TracePhase tp("peephole", &timers[_t_peephole]);
     PhasePeephole peep( _regalloc, cfg);
     peep.do_transform();
   }
 
   // Do late expand if CPU requires this.
   if (Matcher::require_postalloc_expand) {
-    NOT_PRODUCT(TracePhase t2c("postalloc_expand", &_t_postalloc_expand, true));
+    TracePhase tp("postalloc_expand", &timers[_t_postalloc_expand]);
     cfg.postalloc_expand(_regalloc);
   }
 
   // Convert Nodes to instruction bits in a buffer
   {
-    // %%%% workspace merge brought two timers together for one job
-    TracePhase t2a("output", &_t_output, true);
-    NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
+    TraceTime tp("output", &timers[_t_output], CITime);
     Output();
   }
 
@@ -3538,11 +3550,11 @@
   _root = NULL;  // flush the graph, too
 }
 
-Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)
-  : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false),
-    _phase_name(name), _dolog(dolog)
+Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator)
+  : TraceTime(name, accumulator, CITime, CITimeVerbose),
+    _phase_name(name), _dolog(CITimeVerbose)
 {
-  if (dolog) {
+  if (_dolog) {
     C = Compile::current();
     _log = C->log();
   } else {
--- a/hotspot/src/share/vm/opto/compile.hpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/compile.hpp	Thu Sep 25 12:10:57 2014 +0400
@@ -97,10 +97,10 @@
     AliasIdxRaw = 3   // hard-wired index for TypeRawPtr::BOTTOM
   };
 
-  // Variant of TraceTime(NULL, &_t_accumulator, TimeCompiler);
-  // Integrated with logging.  If logging is turned on, and dolog is true,
+  // Variant of TraceTime(NULL, &_t_accumulator, CITime);
+  // Integrated with logging.  If logging is turned on, and CITimeVerbose is true,
   // then brackets are put into the log, with time stamps and node counts.
-  // (The time collection itself is always conditionalized on TimeCompiler.)
+  // (The time collection itself is always conditionalized on CITime.)
   class TracePhase : public TraceTime {
    private:
     Compile*    C;
@@ -108,7 +108,7 @@
     const char* _phase_name;
     bool _dolog;
    public:
-    TracePhase(const char* name, elapsedTimer* accumulator, bool dolog);
+    TracePhase(const char* name, elapsedTimer* accumulator);
     ~TracePhase();
   };
 
--- a/hotspot/src/share/vm/opto/escape.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/escape.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -82,7 +82,7 @@
 }
 
 void ConnectionGraph::do_analysis(Compile *C, PhaseIterGVN *igvn) {
-  Compile::TracePhase t2("escapeAnalysis", &Phase::_t_escapeAnalysis, true);
+  Compile::TracePhase tp("escapeAnalysis", &Phase::timers[Phase::_t_escapeAnalysis]);
   ResourceMark rm;
 
   // Add ConP#NULL and ConN#NULL nodes before ConnectionGraph construction
@@ -117,7 +117,7 @@
   GrowableArray<FieldNode*>      oop_fields_worklist;
   DEBUG_ONLY( GrowableArray<Node*> addp_worklist; )
 
-  { Compile::TracePhase t3("connectionGraph", &Phase::_t_connectionGraph, true);
+  { Compile::TracePhase tp("connectionGraph", &Phase::timers[Phase::_t_connectionGraph]);
 
   // 1. Populate Connection Graph (CG) with PointsTo nodes.
   ideal_nodes.map(C->live_nodes(), NULL);  // preallocate space
--- a/hotspot/src/share/vm/opto/ifg.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/ifg.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -306,6 +306,7 @@
 // at this point can end up in bad places).  Copies I re-insert later I have
 // more opportunity to insert them in low-frequency locations.
 void PhaseChaitin::build_ifg_virtual( ) {
+  Compile::TracePhase tp("buildIFG_virt", &timers[_t_buildIFGvirtual]);
 
   // For all blocks (in any order) do...
   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
@@ -739,7 +740,7 @@
  *   low to high register pressure transition within the block (if any).
  */
 uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
-  NOT_PRODUCT(Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler);)
+  Compile::TracePhase tp("buildIFG", &timers[_t_buildIFGphysical]);
 
   uint must_spill = 0;
   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
--- a/hotspot/src/share/vm/opto/output.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/output.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -1887,7 +1887,7 @@
   if (max_vector_size() > 8)
     return;
 
-  NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
+  TracePhase tp("isched", &timers[_t_instrSched]);
 
   // Create a data structure for all the scheduling information
   Scheduling scheduling(Thread::current()->resource_area(), *this);
--- a/hotspot/src/share/vm/opto/parse1.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/parse1.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -528,7 +528,7 @@
   methods_parsed++;
 #ifndef PRODUCT
   // add method size here to guarantee that inlined methods are added too
-  if (TimeCompiler)
+  if (CITime)
     _total_bytes_compiled += method()->code_size();
 
   show_parse_info();
--- a/hotspot/src/share/vm/opto/phase.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/phase.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -30,56 +30,14 @@
 #include "opto/node.hpp"
 #include "opto/phase.hpp"
 
-#ifndef PRODUCT
 int Phase::_total_bytes_compiled = 0;
 
 elapsedTimer Phase::_t_totalCompilation;
 elapsedTimer Phase::_t_methodCompilation;
 elapsedTimer Phase::_t_stubCompilation;
-#endif
 
-// The next timers used for LogCompilation
-elapsedTimer Phase::_t_parser;
-elapsedTimer Phase::_t_optimizer;
-elapsedTimer   Phase::_t_escapeAnalysis;
-elapsedTimer     Phase::_t_connectionGraph;
-elapsedTimer   Phase::_t_idealLoop;
-elapsedTimer   Phase::_t_ccp;
-elapsedTimer Phase::_t_matcher;
-elapsedTimer Phase::_t_registerAllocation;
-elapsedTimer Phase::_t_output;
-
-#ifndef PRODUCT
-elapsedTimer Phase::_t_graphReshaping;
-elapsedTimer Phase::_t_scheduler;
-elapsedTimer Phase::_t_blockOrdering;
-elapsedTimer Phase::_t_macroEliminate;
-elapsedTimer Phase::_t_macroExpand;
-elapsedTimer Phase::_t_peephole;
-elapsedTimer Phase::_t_postalloc_expand;
-elapsedTimer Phase::_t_codeGeneration;
-elapsedTimer Phase::_t_registerMethod;
-elapsedTimer Phase::_t_temporaryTimer1;
-elapsedTimer Phase::_t_temporaryTimer2;
-elapsedTimer Phase::_t_idealLoopVerify;
-
-// Subtimers for _t_optimizer
-elapsedTimer   Phase::_t_iterGVN;
-elapsedTimer   Phase::_t_iterGVN2;
-elapsedTimer   Phase::_t_incrInline;
-
-// Subtimers for _t_registerAllocation
-elapsedTimer   Phase::_t_ctorChaitin;
-elapsedTimer   Phase::_t_buildIFGphysical;
-elapsedTimer   Phase::_t_computeLive;
-elapsedTimer   Phase::_t_regAllocSplit;
-elapsedTimer   Phase::_t_postAllocCopyRemoval;
-elapsedTimer   Phase::_t_fixupSpills;
-
-// Subtimers for _t_output
-elapsedTimer   Phase::_t_instrSched;
-elapsedTimer   Phase::_t_buildOopMaps;
-#endif
+// The counters to use for LogCompilation
+elapsedTimer Phase::timers[max_phase_timers];
 
 //------------------------------Phase------------------------------------------
 Phase::Phase( PhaseNumber pnum ) : _pnum(pnum), C( pnum == Compiler ? NULL : Compile::current()) {
@@ -89,93 +47,132 @@
   CompileBroker::maybe_block();
 }
 
-#ifndef PRODUCT
-static const double minimum_reported_time             = 0.0001; // seconds
-static const double expected_method_compile_coverage  = 0.97;   // %
-static const double minimum_meaningful_method_compile = 2.00;   // seconds
+void Phase::print_timers() {
+  tty->print_cr ("    C2 Compile Time:      %7.3f s", Phase::_t_totalCompilation.seconds());
+  tty->print_cr ("       Parse:               %7.3f s", timers[_t_parser].seconds());
 
-void Phase::print_timers() {
-  tty->print_cr ("Accumulated compiler times:");
-  tty->print_cr ("---------------------------");
-  tty->print_cr ("  Total compilation: %3.3f sec.", Phase::_t_totalCompilation.seconds());
-  tty->print    ("    method compilation   : %3.3f sec", Phase::_t_methodCompilation.seconds());
-  tty->print    ("/%d bytes",_total_bytes_compiled);
-  tty->print_cr (" (%3.0f bytes per sec) ", Phase::_total_bytes_compiled / Phase::_t_methodCompilation.seconds());
-  tty->print_cr ("    stub compilation     : %3.3f sec.", Phase::_t_stubCompilation.seconds());
-  tty->print_cr ("  Phases:");
-  tty->print_cr ("    parse          : %3.3f sec", Phase::_t_parser.seconds());
-  tty->print_cr ("    optimizer      : %3.3f sec", Phase::_t_optimizer.seconds());
-  if( Verbose || WizardMode ) {
+  {
+    tty->print_cr ("       Optimize:            %7.3f s", timers[_t_optimizer].seconds());
     if (DoEscapeAnalysis) {
       // EA is part of Optimizer.
-      tty->print_cr ("      escape analysis: %3.3f sec", Phase::_t_escapeAnalysis.seconds());
-      tty->print_cr ("        connection graph: %3.3f sec", Phase::_t_connectionGraph.seconds());
-      tty->print_cr ("      macroEliminate : %3.3f sec", Phase::_t_macroEliminate.seconds());
+      tty->print_cr ("         Escape Analysis:     %7.3f s", timers[_t_escapeAnalysis].seconds());
+      tty->print_cr ("           Conn Graph:          %7.3f s", timers[_t_connectionGraph].seconds());
+      tty->print_cr ("           Macro Eliminate:     %7.3f s", timers[_t_macroEliminate].seconds());
     }
-    tty->print_cr ("      iterGVN        : %3.3f sec", Phase::_t_iterGVN.seconds());
-    tty->print_cr ("      incrInline     : %3.3f sec", Phase::_t_incrInline.seconds());
-    tty->print_cr ("      idealLoop      : %3.3f sec", Phase::_t_idealLoop.seconds());
-    tty->print_cr ("      idealLoopVerify: %3.3f sec", Phase::_t_idealLoopVerify.seconds());
-    tty->print_cr ("      ccp            : %3.3f sec", Phase::_t_ccp.seconds());
-    tty->print_cr ("      iterGVN2       : %3.3f sec", Phase::_t_iterGVN2.seconds());
-    tty->print_cr ("      macroExpand    : %3.3f sec", Phase::_t_macroExpand.seconds());
-    tty->print_cr ("      graphReshape   : %3.3f sec", Phase::_t_graphReshaping.seconds());
-    double optimizer_subtotal = Phase::_t_iterGVN.seconds() + Phase::_t_iterGVN2.seconds() +
-      Phase::_t_escapeAnalysis.seconds() + Phase::_t_macroEliminate.seconds() +
-      Phase::_t_idealLoop.seconds() + Phase::_t_ccp.seconds() +
-      Phase::_t_macroExpand.seconds() + Phase::_t_graphReshaping.seconds();
-    double percent_of_optimizer = ((optimizer_subtotal == 0.0) ? 0.0 : (optimizer_subtotal / Phase::_t_optimizer.seconds() * 100.0));
-    tty->print_cr ("      subtotal       : %3.3f sec,  %3.2f %%", optimizer_subtotal, percent_of_optimizer);
-  }
-  tty->print_cr ("    matcher        : %3.3f sec", Phase::_t_matcher.seconds());
-  tty->print_cr ("    scheduler      : %3.3f sec", Phase::_t_scheduler.seconds());
-  tty->print_cr ("    regalloc       : %3.3f sec", Phase::_t_registerAllocation.seconds());
-  if( Verbose || WizardMode ) {
-    tty->print_cr ("      ctorChaitin    : %3.3f sec", Phase::_t_ctorChaitin.seconds());
-    tty->print_cr ("      buildIFG       : %3.3f sec", Phase::_t_buildIFGphysical.seconds());
-    tty->print_cr ("      computeLive    : %3.3f sec", Phase::_t_computeLive.seconds());
-    tty->print_cr ("      regAllocSplit  : %3.3f sec", Phase::_t_regAllocSplit.seconds());
-    tty->print_cr ("      postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds());
-    tty->print_cr ("      fixupSpills    : %3.3f sec", Phase::_t_fixupSpills.seconds());
-    double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() +
-      Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() +
-      Phase::_t_regAllocSplit.seconds()    + Phase::_t_fixupSpills.seconds() +
-      Phase::_t_postAllocCopyRemoval.seconds();
-    double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0));
-    tty->print_cr ("      subtotal       : %3.3f sec,  %3.2f %%", regalloc_subtotal, percent_of_regalloc);
+    tty->print_cr ("         GVN 1:               %7.3f s", timers[_t_iterGVN].seconds());
+
+    {
+       tty->print_cr ("         Incremental Inline:  %7.3f s", timers[_t_incrInline].seconds());
+       tty->print_cr ("           IdealLoop:           %7.3f s", timers[_t_incrInline_ideal].seconds());
+       tty->print_cr ("           IGVN:                %7.3f s", timers[_t_incrInline_igvn].seconds());
+       tty->print_cr ("           Inline:              %7.3f s", timers[_t_incrInline_inline].seconds());
+       tty->print_cr ("           Prune Useless:       %7.3f s", timers[_t_incrInline_pru].seconds());
+
+       double other = timers[_t_incrInline].seconds() -
+        (timers[_t_incrInline_ideal].seconds() +
+         timers[_t_incrInline_igvn].seconds() +
+         timers[_t_incrInline_inline].seconds() +
+         timers[_t_incrInline_pru].seconds());
+       if (other > 0) {
+         tty->print_cr("           Other:               %7.3f s", other);
+       }
+    }
+    tty->print_cr ("         IdealLoop:           %7.3f s", timers[_t_idealLoop].seconds());
+    tty->print_cr ("         IdealLoop Verify:    %7.3f s", timers[_t_idealLoopVerify].seconds());
+    tty->print_cr ("         Cond Const Prop:     %7.3f s", timers[_t_ccp].seconds());
+    tty->print_cr ("         GVN 2:               %7.3f s", timers[_t_iterGVN2].seconds());
+    tty->print_cr ("         Macro Expand:        %7.3f s", timers[_t_macroExpand].seconds());
+    tty->print_cr ("         Graph Reshape:       %7.3f s", timers[_t_graphReshaping].seconds());
+
+    double other = timers[_t_optimizer].seconds() -
+      (timers[_t_escapeAnalysis].seconds() +
+       timers[_t_iterGVN].seconds() +
+       timers[_t_incrInline].seconds() +
+       timers[_t_idealLoop].seconds() +
+       timers[_t_idealLoopVerify].seconds() +
+       timers[_t_ccp].seconds() +
+       timers[_t_iterGVN2].seconds() +
+       timers[_t_macroExpand].seconds() +
+       timers[_t_graphReshaping].seconds());
+    if (other > 0) {
+      tty->print_cr("         Other:               %7.3f s", other);
+    }
   }
-  tty->print_cr ("    blockOrdering  : %3.3f sec", Phase::_t_blockOrdering.seconds());
-  tty->print_cr ("    peephole       : %3.3f sec", Phase::_t_peephole.seconds());
-  if (Matcher::require_postalloc_expand) {
-    tty->print_cr ("    postalloc_expand: %3.3f sec", Phase::_t_postalloc_expand.seconds());
-  }
-  tty->print_cr ("    codeGen        : %3.3f sec", Phase::_t_codeGeneration.seconds());
-  tty->print_cr ("    install_code   : %3.3f sec", Phase::_t_registerMethod.seconds());
-  tty->print_cr ("    -------------- : ----------");
-  double phase_subtotal = Phase::_t_parser.seconds() +
-    Phase::_t_optimizer.seconds() + Phase::_t_graphReshaping.seconds() +
-    Phase::_t_matcher.seconds() + Phase::_t_scheduler.seconds() +
-    Phase::_t_registerAllocation.seconds() + Phase::_t_blockOrdering.seconds() +
-    Phase::_t_codeGeneration.seconds() + Phase::_t_registerMethod.seconds();
-  double percent_of_method_compile = ((phase_subtotal == 0.0) ? 0.0 : phase_subtotal / Phase::_t_methodCompilation.seconds()) * 100.0;
-  // counters inside Compile::CodeGen include time for adapters and stubs
-  // so phase-total can be greater than 100%
-  tty->print_cr ("    total          : %3.3f sec,  %3.2f %%", phase_subtotal, percent_of_method_compile);
+
+  tty->print_cr ("       Matcher:             %7.3f s", timers[_t_matcher].seconds());
+  tty->print_cr ("       Scheduler:           %7.3f s", timers[_t_scheduler].seconds());
+
+  {
+    tty->print_cr ("       Regalloc:            %7.3f s", timers[_t_registerAllocation].seconds());
+    tty->print_cr ("         Ctor Chaitin:        %7.3f s", timers[_t_ctorChaitin].seconds());
+    tty->print_cr ("         Build IFG (virt):    %7.3f s", timers[_t_buildIFGvirtual].seconds());
+    tty->print_cr ("         Build IFG (phys):    %7.3f s", timers[_t_buildIFGphysical].seconds());
+    tty->print_cr ("         Compute Liveness:    %7.3f s", timers[_t_computeLive].seconds());
+    tty->print_cr ("         Regalloc Split:      %7.3f s", timers[_t_regAllocSplit].seconds());
+    tty->print_cr ("         Postalloc Copy Rem:  %7.3f s", timers[_t_postAllocCopyRemoval].seconds());
+    tty->print_cr ("         Fixup Spills:        %7.3f s", timers[_t_fixupSpills].seconds());
+    tty->print_cr ("         Compact:             %7.3f s", timers[_t_chaitinCompact].seconds());
+    tty->print_cr ("         Coalesce 1:          %7.3f s", timers[_t_chaitinCoalesce1].seconds());
+    tty->print_cr ("         Coalesce 2:          %7.3f s", timers[_t_chaitinCoalesce2].seconds());
+    tty->print_cr ("         Coalesce 3:          %7.3f s", timers[_t_chaitinCoalesce3].seconds());
+    tty->print_cr ("         Cache LRG:           %7.3f s", timers[_t_chaitinCacheLRG].seconds());
+    tty->print_cr ("         Simplify:            %7.3f s", timers[_t_chaitinSimplify].seconds());
+    tty->print_cr ("         Select:              %7.3f s", timers[_t_chaitinSelect].seconds());
+
+    double other = timers[_t_registerAllocation].seconds() -
+      (timers[_t_ctorChaitin].seconds() +
+       timers[_t_buildIFGvirtual].seconds() +
+       timers[_t_buildIFGphysical].seconds() +
+       timers[_t_computeLive].seconds() +
+       timers[_t_regAllocSplit].seconds() +
+       timers[_t_postAllocCopyRemoval].seconds() +
+       timers[_t_fixupSpills].seconds() +
+       timers[_t_chaitinCompact].seconds() +
+       timers[_t_chaitinCoalesce1].seconds() +
+       timers[_t_chaitinCoalesce2].seconds() +
+       timers[_t_chaitinCoalesce3].seconds() +
+       timers[_t_chaitinCacheLRG].seconds() +
+       timers[_t_chaitinSimplify].seconds() +
+       timers[_t_chaitinSelect].seconds());
 
-  assert( percent_of_method_compile > expected_method_compile_coverage ||
-          phase_subtotal < minimum_meaningful_method_compile,
-          "Must account for method compilation");
+    if (other > 0) {
+      tty->print_cr("         Other:               %7.3f s", other);
+    }
+  }
+  tty->print_cr ("       Block Ordering:      %7.3f s", timers[_t_blockOrdering].seconds());
+  tty->print_cr ("       Peephole:            %7.3f s", timers[_t_peephole].seconds());
+  if (Matcher::require_postalloc_expand) {
+    tty->print_cr ("       Postalloc Expand:    %7.3f s", timers[_t_postalloc_expand].seconds());
+  }
+  tty->print_cr ("       Code Emission:         %7.3f s", timers[_t_output].seconds());
+  tty->print_cr ("         Insn Scheduling:     %7.3f s", timers[_t_instrSched].seconds());
+  tty->print_cr ("         Build OOP maps:      %7.3f s", timers[_t_buildOopMaps].seconds());
+  tty->print_cr ("       Code Installation:   %7.3f s", timers[_t_registerMethod].seconds());
 
-  if( Phase::_t_temporaryTimer1.seconds() > minimum_reported_time ) {
+  if( timers[_t_temporaryTimer1].seconds() > 0 ) {
     tty->cr();
-    tty->print_cr ("    temporaryTimer1: %3.3f sec", Phase::_t_temporaryTimer1.seconds());
+    tty->print_cr ("       Temp Timer 1:        %7.3f s", timers[_t_temporaryTimer1].seconds());
   }
-  if( Phase::_t_temporaryTimer2.seconds() > minimum_reported_time ) {
+  if( timers[_t_temporaryTimer2].seconds() > 0 ) {
     tty->cr();
-    tty->print_cr ("    temporaryTimer2: %3.3f sec", Phase::_t_temporaryTimer2.seconds());
+    tty->print_cr ("       Temp Timer 2:        %7.3f s", timers[_t_temporaryTimer2].seconds());
   }
-  tty->print_cr ("    output         : %3.3f sec", Phase::_t_output.seconds());
-  tty->print_cr ("      isched         : %3.3f sec", Phase::_t_instrSched.seconds());
-  tty->print_cr ("      bldOopMaps     : %3.3f sec", Phase::_t_buildOopMaps.seconds());
+
+   double other = Phase::_t_totalCompilation.seconds() -
+      (timers[_t_parser].seconds() +
+       timers[_t_optimizer].seconds() +
+       timers[_t_matcher].seconds() +
+       timers[_t_scheduler].seconds() +
+       timers[_t_registerAllocation].seconds() +
+       timers[_t_blockOrdering].seconds() +
+       timers[_t_peephole].seconds() +
+       timers[_t_postalloc_expand].seconds() +
+       timers[_t_output].seconds() +
+       timers[_t_registerMethod].seconds() +
+       timers[_t_temporaryTimer1].seconds() +
+       timers[_t_temporaryTimer2].seconds());
+    if (other > 0) {
+      tty->print_cr("       Other:               %7.3f s", other);
+    }
+
 }
-#endif
--- a/hotspot/src/share/vm/opto/phase.hpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/phase.hpp	Thu Sep 25 12:10:57 2014 +0400
@@ -60,63 +60,65 @@
     Peephole,                   // Apply peephole optimizations
     last_phase
   };
+
+  enum PhaseTraceId {
+    _t_parser,
+    _t_optimizer,
+      _t_escapeAnalysis,
+        _t_connectionGraph,
+        _t_macroEliminate,
+      _t_iterGVN,
+      _t_incrInline,
+        _t_incrInline_ideal,
+        _t_incrInline_igvn,
+        _t_incrInline_pru,
+        _t_incrInline_inline,
+      _t_idealLoop,
+      _t_idealLoopVerify,
+      _t_ccp,
+      _t_iterGVN2,
+      _t_macroExpand,
+      _t_graphReshaping,
+    _t_matcher,
+    _t_scheduler,
+    _t_registerAllocation,
+      _t_ctorChaitin,
+      _t_buildIFGvirtual,
+      _t_buildIFGphysical,
+      _t_computeLive,
+      _t_regAllocSplit,
+      _t_postAllocCopyRemoval,
+      _t_fixupSpills,
+      _t_chaitinCompact,
+      _t_chaitinCoalesce1,
+      _t_chaitinCoalesce2,
+      _t_chaitinCoalesce3,
+      _t_chaitinCacheLRG,
+      _t_chaitinSimplify,
+      _t_chaitinSelect,
+    _t_blockOrdering,
+    _t_peephole,
+    _t_postalloc_expand,
+    _t_output,
+       _t_instrSched,
+       _t_buildOopMaps,
+    _t_registerMethod,
+    _t_temporaryTimer1,
+    _t_temporaryTimer2,
+    max_phase_timers
+   };
+
+  static elapsedTimer timers[max_phase_timers];
+
 protected:
   enum PhaseNumber _pnum;       // Phase number (for stat gathering)
 
-#ifndef PRODUCT
   static int _total_bytes_compiled;
 
   // accumulated timers
   static elapsedTimer _t_totalCompilation;
   static elapsedTimer _t_methodCompilation;
   static elapsedTimer _t_stubCompilation;
-#endif
-
-// The next timers used for LogCompilation
-  static elapsedTimer _t_parser;
-  static elapsedTimer _t_optimizer;
-public:
-  // ConnectionGraph can't be Phase since it is used after EA done.
-  static elapsedTimer   _t_escapeAnalysis;
-  static elapsedTimer     _t_connectionGraph;
-protected:
-  static elapsedTimer   _t_idealLoop;
-  static elapsedTimer   _t_ccp;
-  static elapsedTimer _t_matcher;
-  static elapsedTimer _t_registerAllocation;
-  static elapsedTimer _t_output;
-
-#ifndef PRODUCT
-  static elapsedTimer _t_graphReshaping;
-  static elapsedTimer _t_scheduler;
-  static elapsedTimer _t_blockOrdering;
-  static elapsedTimer _t_macroEliminate;
-  static elapsedTimer _t_macroExpand;
-  static elapsedTimer _t_peephole;
-  static elapsedTimer _t_postalloc_expand;
-  static elapsedTimer _t_codeGeneration;
-  static elapsedTimer _t_registerMethod;
-  static elapsedTimer _t_temporaryTimer1;
-  static elapsedTimer _t_temporaryTimer2;
-  static elapsedTimer _t_idealLoopVerify;
-
-// Subtimers for _t_optimizer
-  static elapsedTimer   _t_iterGVN;
-  static elapsedTimer   _t_iterGVN2;
-  static elapsedTimer   _t_incrInline;
-
-// Subtimers for _t_registerAllocation
-  static elapsedTimer   _t_ctorChaitin;
-  static elapsedTimer   _t_buildIFGphysical;
-  static elapsedTimer   _t_computeLive;
-  static elapsedTimer   _t_regAllocSplit;
-  static elapsedTimer   _t_postAllocCopyRemoval;
-  static elapsedTimer   _t_fixupSpills;
-
-// Subtimers for _t_output
-  static elapsedTimer   _t_instrSched;
-  static elapsedTimer   _t_buildOopMaps;
-#endif
 
   // Generate a subtyping check.  Takes as input the subtype and supertype.
   // Returns 2 values: sets the default control() to the true path and
@@ -131,9 +133,7 @@
   Compile * C;
   Phase( PhaseNumber pnum );
 
-#ifndef PRODUCT
   static void print_timers();
-#endif
 };
 
 #endif // SHARE_VM_OPTO_PHASE_HPP
--- a/hotspot/src/share/vm/opto/postaloc.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/postaloc.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -403,7 +403,7 @@
 // When we see a use from a reg-reg Copy, we will attempt to use the copy's
 // source directly and make the copy go dead.
 void PhaseChaitin::post_allocate_copy_removal() {
-  NOT_PRODUCT( Compile::TracePhase t3("postAllocCopyRemoval", &_t_postAllocCopyRemoval, TimeCompiler); )
+  Compile::TracePhase tp("postAllocCopyRemoval", &timers[_t_postAllocCopyRemoval]);
   ResourceMark rm;
 
   // Need a mapping from basic block Node_Lists.  We need a Node_List to
--- a/hotspot/src/share/vm/opto/reg_split.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/opto/reg_split.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -468,7 +468,7 @@
 //       Else, hoist LRG back up to register only (ie - split is also DEF)
 // We will compute a new maxlrg as we go
 uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
-  NOT_PRODUCT( Compile::TracePhase t3("regAllocSplit", &_t_regAllocSplit, TimeCompiler); )
+  Compile::TracePhase tp("regAllocSplit", &timers[_t_regAllocSplit]);
 
   // Free thread local resources used by this method on exit.
   ResourceMark rm(split_arena);
--- a/hotspot/src/share/vm/runtime/globals.hpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Thu Sep 25 12:10:57 2014 +0400
@@ -2487,6 +2487,9 @@
   product(bool, CITime, false,                                              \
           "collect timing information for compilation")                     \
                                                                             \
+  develop(bool, CITimeVerbose, false,                                       \
+          "be more verbose in compilation timings")                         \
+                                                                            \
   develop(bool, CITimeEach, false,                                          \
           "display timing information after each successful compilation")   \
                                                                             \
@@ -2574,9 +2577,6 @@
   product(bool, UseTypeProfile, true,                                       \
           "Check interpreter profile for historically monomorphic calls")   \
                                                                             \
-  notproduct(bool, TimeCompiler, false,                                     \
-          "Time the compiler")                                              \
-                                                                            \
   diagnostic(bool, PrintInlining, false,                                    \
           "Print inlining optimizations")                                   \
                                                                             \
--- a/hotspot/src/share/vm/runtime/java.cpp	Mon Sep 29 08:40:51 2014 +0200
+++ b/hotspot/src/share/vm/runtime/java.cpp	Thu Sep 25 12:10:57 2014 +0400
@@ -272,9 +272,6 @@
 
   print_method_profiling_data();
 
-  if (TimeCompiler) {
-    COMPILER2_PRESENT(Compile::print_timers();)
-  }
   if (TimeCompilationPolicy) {
     CompilationPolicy::policy()->print_time();
   }