hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
changeset 33628 09241459a8b8
parent 33105 294e48b4f704
child 34205 9ec51d30a11e
child 34148 6efbc7ffd767
equal deleted inserted replaced
33627:c5b7455f846e 33628:09241459a8b8
    42 #if INCLUDE_ALL_GCS
    42 #if INCLUDE_ALL_GCS
    43 #include "gc/g1/g1CollectedHeap.inline.hpp"
    43 #include "gc/g1/g1CollectedHeap.inline.hpp"
    44 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
    44 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
    45 #include "gc/g1/heapRegion.hpp"
    45 #include "gc/g1/heapRegion.hpp"
    46 #endif // INCLUDE_ALL_GCS
    46 #endif // INCLUDE_ALL_GCS
       
    47 #ifdef COMPILER2
       
    48 #include "opto/intrinsicnode.hpp"
       
    49 #endif
    47 
    50 
    48 #ifdef PRODUCT
    51 #ifdef PRODUCT
    49 #define BLOCK_COMMENT(str) /* nothing */
    52 #define BLOCK_COMMENT(str) /* nothing */
    50 #define STOP(error) stop(error)
    53 #define STOP(error) stop(error)
    51 #else
    54 #else
  4251       load_ptr_contents(base, G6_heapbase);
  4254       load_ptr_contents(base, G6_heapbase);
  4252     }
  4255     }
  4253   }
  4256   }
  4254 }
  4257 }
  4255 
  4258 
  4256 // Compare char[] arrays aligned to 4 bytes.
  4259 #ifdef COMPILER2
  4257 void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
  4260 
  4258                                         Register limit, Register result,
  4261 // Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
  4259                                         Register chr1, Register chr2, Label& Ldone) {
  4262 void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result,
  4260   Label Lvector, Lloop;
  4263                                         Register tmp1, Register tmp2, Register tmp3, Register tmp4,
  4261   assert(chr1 == result, "should be the same");
  4264                                         FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) {
  4262 
  4265   Label Lloop, Lslow;
  4263   // Note: limit contains number of bytes (2*char_elements) != 0.
  4266   assert(UseVIS >= 3, "VIS3 is required");
  4264   andcc(limit, 0x2, chr1); // trailing character ?
  4267   assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result);
       
  4268   assert_different_registers(ftmp1, ftmp2, ftmp3);
       
  4269 
       
  4270   // Check if cnt >= 8 (= 16 bytes)
       
  4271   cmp(cnt, 8);
       
  4272   br(Assembler::less, false, Assembler::pn, Lslow);
       
  4273   delayed()->mov(cnt, result); // copy count
       
  4274 
       
  4275   // Check for 8-byte alignment of src and dst
       
  4276   or3(src, dst, tmp1);
       
  4277   andcc(tmp1, 7, G0);
       
  4278   br(Assembler::notZero, false, Assembler::pn, Lslow);
       
  4279   delayed()->nop();
       
  4280 
       
  4281   // Set mask for bshuffle instruction
       
  4282   Register mask = tmp4;
       
  4283   set(0x13579bdf, mask);
       
  4284   bmask(mask, G0, G0);
       
  4285 
       
  4286   // Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters
       
  4287   Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00
       
  4288   add(mask, 0x300, mask);             // mask = 0x0000 0000 ff00 ff00
       
  4289   sllx(mask, 32, tmp1);               // tmp1 = 0xff00 ff00 0000 0000
       
  4290   or3(mask, tmp1, mask);              // mask = 0xff00 ff00 ff00 ff00
       
  4291 
       
  4292   // Load first 8 bytes
       
  4293   ldx(src, 0, tmp1);
       
  4294 
       
  4295   bind(Lloop);
       
  4296   // Load next 8 bytes
       
  4297   ldx(src, 8, tmp2);
       
  4298 
       
  4299   // Check for non-latin1 character by testing if the most significant byte of a char is set.
       
  4300   // Although we have to move the data between integer and floating point registers, this is
       
  4301   // still faster than the corresponding VIS instructions (ford/fand/fcmpd).
       
  4302   or3(tmp1, tmp2, tmp3);
       
  4303   btst(tmp3, mask);
       
  4304   // annul zeroing if branch is not taken to preserve original count
       
  4305   brx(Assembler::notZero, true, Assembler::pn, Ldone);
       
  4306   delayed()->mov(G0, result); // 0 - failed
       
  4307 
       
  4308   // Move bytes into float register
       
  4309   movxtod(tmp1, ftmp1);
       
  4310   movxtod(tmp2, ftmp2);
       
  4311 
       
  4312   // Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3
       
  4313   bshuffle(ftmp1, ftmp2, ftmp3);
       
  4314   stf(FloatRegisterImpl::D, ftmp3, dst, 0);
       
  4315 
       
  4316   // Increment addresses and decrement count
       
  4317   inc(src, 16);
       
  4318   inc(dst, 8);
       
  4319   dec(cnt, 8);
       
  4320 
       
  4321   cmp(cnt, 8);
       
  4322   // annul LDX if branch is not taken to prevent access past end of string
       
  4323   br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
       
  4324   delayed()->ldx(src, 0, tmp1);
       
  4325 
       
  4326   // Fallback to slow version
       
  4327   bind(Lslow);
       
  4328 }
       
  4329 
       
  4330 // Compress char[] to byte[]. Return 0 on failure.
       
  4331 void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) {
       
  4332   Label Lloop;
       
  4333   assert_different_registers(src, dst, cnt, tmp, result);
       
  4334 
       
  4335   lduh(src, 0, tmp);
       
  4336 
       
  4337   bind(Lloop);
       
  4338   inc(src, sizeof(jchar));
       
  4339   cmp(tmp, 0xff);
       
  4340   // annul zeroing if branch is not taken to preserve original count
       
  4341   br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc
       
  4342   delayed()->mov(G0, result); // 0 - failed
       
  4343   deccc(cnt);
       
  4344   stb(tmp, dst, 0);
       
  4345   inc(dst);
       
  4346   // annul LDUH if branch is not taken to prevent access past end of string
       
  4347   br(Assembler::notZero, true, Assembler::pt, Lloop);
       
  4348   delayed()->lduh(src, 0, tmp); // hoisted
       
  4349 }
       
  4350 
       
  4351 // Inflate byte[] to char[] by inflating 16 bytes at once.
       
  4352 void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
       
  4353                                        FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) {
       
  4354   Label Lloop, Lslow;
       
  4355   assert(UseVIS >= 3, "VIS3 is required");
       
  4356   assert_different_registers(src, dst, cnt, tmp);
       
  4357   assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4);
       
  4358 
       
  4359   // Check if cnt >= 8 (= 16 bytes)
       
  4360   cmp(cnt, 8);
       
  4361   br(Assembler::less, false, Assembler::pn, Lslow);
       
  4362   delayed()->nop();
       
  4363 
       
  4364   // Check for 8-byte alignment of src and dst
       
  4365   or3(src, dst, tmp);
       
  4366   andcc(tmp, 7, G0);
       
  4367   br(Assembler::notZero, false, Assembler::pn, Lslow);
       
  4368   // Initialize float register to zero
       
  4369   FloatRegister zerof = ftmp4;
       
  4370   delayed()->fzero(FloatRegisterImpl::D, zerof);
       
  4371 
       
  4372   // Load first 8 bytes
       
  4373   ldf(FloatRegisterImpl::D, src, 0, ftmp1);
       
  4374 
       
  4375   bind(Lloop);
       
  4376   inc(src, 8);
       
  4377   dec(cnt, 8);
       
  4378 
       
  4379   // Inflate the string by interleaving each byte from the source array
       
  4380   // with a zero byte and storing the result in the destination array.
       
  4381   fpmerge(zerof, ftmp1->successor(), ftmp2);
       
  4382   stf(FloatRegisterImpl::D, ftmp2, dst, 8);
       
  4383   fpmerge(zerof, ftmp1, ftmp3);
       
  4384   stf(FloatRegisterImpl::D, ftmp3, dst, 0);
       
  4385 
       
  4386   inc(dst, 16);
       
  4387 
       
  4388   cmp(cnt, 8);
       
  4389   // annul LDX if branch is not taken to prevent access past end of string
       
  4390   br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
       
  4391   delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1);
       
  4392 
       
  4393   // Fallback to slow version
       
  4394   bind(Lslow);
       
  4395 }
       
  4396 
       
  4397 // Inflate byte[] to char[].
       
  4398 void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) {
       
  4399   Label Loop;
       
  4400   assert_different_registers(src, dst, cnt, tmp);
       
  4401 
       
  4402   ldub(src, 0, tmp);
       
  4403   bind(Loop);
       
  4404   inc(src);
       
  4405   deccc(cnt);
       
  4406   sth(tmp, dst, 0);
       
  4407   inc(dst, sizeof(jchar));
       
  4408   // annul LDUB if branch is not taken to prevent access past end of string
       
  4409   br(Assembler::notZero, true, Assembler::pt, Loop);
       
  4410   delayed()->ldub(src, 0, tmp); // hoisted
       
  4411 }
       
  4412 
       
  4413 void MacroAssembler::string_compare(Register str1, Register str2,
       
  4414                                     Register cnt1, Register cnt2,
       
  4415                                     Register tmp1, Register tmp2,
       
  4416                                     Register result, int ae) {
       
  4417   Label Ldone, Lloop;
       
  4418   assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result);
       
  4419   int stride1, stride2;
       
  4420 
       
  4421   // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
       
  4422   // we interchange str1 and str2 in the UL case and negate the result.
       
  4423   // Like this, str1 is always latin1 encoded, expect for the UU case.
       
  4424 
       
  4425   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
       
  4426     srl(cnt2, 1, cnt2);
       
  4427   }
       
  4428 
       
  4429   // See if the lengths are different, and calculate min in cnt1.
       
  4430   // Save diff in case we need it for a tie-breaker.
       
  4431   Label Lskip;
       
  4432   Register diff = tmp1;
       
  4433   subcc(cnt1, cnt2, diff);
       
  4434   br(Assembler::greater, true, Assembler::pt, Lskip);
       
  4435   // cnt2 is shorter, so use its count:
       
  4436   delayed()->mov(cnt2, cnt1);
       
  4437   bind(Lskip);
       
  4438 
       
  4439   // Rename registers
       
  4440   Register limit1 = cnt1;
       
  4441   Register limit2 = limit1;
       
  4442   Register chr1   = result;
       
  4443   Register chr2   = cnt2;
       
  4444   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
       
  4445     // We need an additional register to keep track of two limits
       
  4446     assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
       
  4447     limit2 = tmp2;
       
  4448   }
       
  4449 
       
  4450   // Is the minimum length zero?
       
  4451   cmp(limit1, (int)0); // use cast to resolve overloading ambiguity
       
  4452   br(Assembler::equal, true, Assembler::pn, Ldone);
       
  4453   // result is difference in lengths
       
  4454   if (ae == StrIntrinsicNode::UU) {
       
  4455     delayed()->sra(diff, 1, result);  // Divide by 2 to get number of chars
       
  4456   } else {
       
  4457     delayed()->mov(diff, result);
       
  4458   }
       
  4459 
       
  4460   // Load first characters
       
  4461   if (ae == StrIntrinsicNode::LL) {
       
  4462     stride1 = stride2 = sizeof(jbyte);
       
  4463     ldub(str1, 0, chr1);
       
  4464     ldub(str2, 0, chr2);
       
  4465   } else if (ae == StrIntrinsicNode::UU) {
       
  4466     stride1 = stride2 = sizeof(jchar);
       
  4467     lduh(str1, 0, chr1);
       
  4468     lduh(str2, 0, chr2);
       
  4469   } else {
       
  4470     stride1 = sizeof(jbyte);
       
  4471     stride2 = sizeof(jchar);
       
  4472     ldub(str1, 0, chr1);
       
  4473     lduh(str2, 0, chr2);
       
  4474   }
       
  4475 
       
  4476   // Compare first characters
       
  4477   subcc(chr1, chr2, chr1);
       
  4478   br(Assembler::notZero, false, Assembler::pt, Ldone);
       
  4479   assert(chr1 == result, "result must be pre-placed");
       
  4480   delayed()->nop();
       
  4481 
       
  4482   // Check if the strings start at same location
       
  4483   cmp(str1, str2);
       
  4484   brx(Assembler::equal, true, Assembler::pn, Ldone);
       
  4485   delayed()->mov(G0, result);  // result is zero
       
  4486 
       
  4487   // We have no guarantee that on 64 bit the higher half of limit is 0
       
  4488   signx(limit1);
       
  4489 
       
  4490   // Get limit
       
  4491   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
       
  4492     sll(limit1, 1, limit2);
       
  4493     subcc(limit2, stride2, chr2);
       
  4494   }
       
  4495   subcc(limit1, stride1, chr1);
       
  4496   br(Assembler::zero, true, Assembler::pn, Ldone);
       
  4497   // result is difference in lengths
       
  4498   if (ae == StrIntrinsicNode::UU) {
       
  4499     delayed()->sra(diff, 1, result);  // Divide by 2 to get number of chars
       
  4500   } else {
       
  4501     delayed()->mov(diff, result);
       
  4502   }
       
  4503 
       
  4504   // Shift str1 and str2 to the end of the arrays, negate limit
       
  4505   add(str1, limit1, str1);
       
  4506   add(str2, limit2, str2);
       
  4507   neg(chr1, limit1);  // limit1 = -(limit1-stride1)
       
  4508   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
       
  4509     neg(chr2, limit2);  // limit2 = -(limit2-stride2)
       
  4510   }
       
  4511 
       
  4512   // Compare the rest of the characters
       
  4513   if (ae == StrIntrinsicNode::UU) {
       
  4514     lduh(str1, limit1, chr1);
       
  4515   } else {
       
  4516     ldub(str1, limit1, chr1);
       
  4517   }
       
  4518 
       
  4519   bind(Lloop);
       
  4520   if (ae == StrIntrinsicNode::LL) {
       
  4521     ldub(str2, limit2, chr2);
       
  4522   } else {
       
  4523     lduh(str2, limit2, chr2);
       
  4524   }
       
  4525 
       
  4526   subcc(chr1, chr2, chr1);
       
  4527   br(Assembler::notZero, false, Assembler::pt, Ldone);
       
  4528   assert(chr1 == result, "result must be pre-placed");
       
  4529   delayed()->inccc(limit1, stride1);
       
  4530   if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
       
  4531     inccc(limit2, stride2);
       
  4532   }
       
  4533 
       
  4534   // annul LDUB if branch is not taken to prevent access past end of string
       
  4535   br(Assembler::notZero, true, Assembler::pt, Lloop);
       
  4536   if (ae == StrIntrinsicNode::UU) {
       
  4537     delayed()->lduh(str1, limit2, chr1);
       
  4538   } else {
       
  4539     delayed()->ldub(str1, limit1, chr1);
       
  4540   }
       
  4541 
       
  4542   // If strings are equal up to min length, return the length difference.
       
  4543   if (ae == StrIntrinsicNode::UU) {
       
  4544     // Divide by 2 to get number of chars
       
  4545     sra(diff, 1, result);
       
  4546   } else {
       
  4547     mov(diff, result);
       
  4548   }
       
  4549 
       
  4550   // Otherwise, return the difference between the first mismatched chars.
       
  4551   bind(Ldone);
       
  4552   if(ae == StrIntrinsicNode::UL) {
       
  4553     // Negate result (see note above)
       
  4554     neg(result);
       
  4555   }
       
  4556 }
       
  4557 
       
  4558 void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
       
  4559                                   Register limit, Register tmp, Register result, bool is_byte) {
       
  4560   Label Ldone, Lvector, Lloop;
       
  4561   assert_different_registers(ary1, ary2, limit, tmp, result);
       
  4562 
       
  4563   int length_offset  = arrayOopDesc::length_offset_in_bytes();
       
  4564   int base_offset    = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
       
  4565 
       
  4566   if (is_array_equ) {
       
  4567     // return true if the same array
       
  4568     cmp(ary1, ary2);
       
  4569     brx(Assembler::equal, true, Assembler::pn, Ldone);
       
  4570     delayed()->add(G0, 1, result); // equal
       
  4571 
       
  4572     br_null(ary1, true, Assembler::pn, Ldone);
       
  4573     delayed()->mov(G0, result);    // not equal
       
  4574 
       
  4575     br_null(ary2, true, Assembler::pn, Ldone);
       
  4576     delayed()->mov(G0, result);    // not equal
       
  4577 
       
  4578     // load the lengths of arrays
       
  4579     ld(Address(ary1, length_offset), limit);
       
  4580     ld(Address(ary2, length_offset), tmp);
       
  4581 
       
  4582     // return false if the two arrays are not equal length
       
  4583     cmp(limit, tmp);
       
  4584     br(Assembler::notEqual, true, Assembler::pn, Ldone);
       
  4585     delayed()->mov(G0, result);    // not equal
       
  4586   }
       
  4587 
       
  4588   cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
       
  4589   delayed()->add(G0, 1, result); // zero-length arrays are equal
       
  4590 
       
  4591   if (is_array_equ) {
       
  4592     // load array addresses
       
  4593     add(ary1, base_offset, ary1);
       
  4594     add(ary2, base_offset, ary2);
       
  4595   } else {
       
  4596     // We have no guarantee that on 64 bit the higher half of limit is 0
       
  4597     signx(limit);
       
  4598   }
       
  4599 
       
  4600   if (is_byte) {
       
  4601     Label Lskip;
       
  4602     // check for trailing byte
       
  4603     andcc(limit, 0x1, tmp);
       
  4604     br(Assembler::zero, false, Assembler::pt, Lskip);
       
  4605     delayed()->nop();
       
  4606 
       
  4607     // compare the trailing byte
       
  4608     sub(limit, sizeof(jbyte), limit);
       
  4609     ldub(ary1, limit, result);
       
  4610     ldub(ary2, limit, tmp);
       
  4611     cmp(result, tmp);
       
  4612     br(Assembler::notEqual, true, Assembler::pt, Ldone);
       
  4613     delayed()->mov(G0, result);    // not equal
       
  4614 
       
  4615     // only one byte?
       
  4616     cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
       
  4617     delayed()->add(G0, 1, result); // zero-length arrays are equal
       
  4618     bind(Lskip);
       
  4619   } else if (is_array_equ) {
       
  4620     // set byte count
       
  4621     sll(limit, exact_log2(sizeof(jchar)), limit);
       
  4622   }
       
  4623 
       
  4624   // check for trailing character
       
  4625   andcc(limit, 0x2, tmp);
  4265   br(Assembler::zero, false, Assembler::pt, Lvector);
  4626   br(Assembler::zero, false, Assembler::pt, Lvector);
  4266   delayed()->nop();
  4627   delayed()->nop();
  4267 
  4628 
  4268   // compare the trailing char
  4629   // compare the trailing char
  4269   sub(limit, sizeof(jchar), limit);
  4630   sub(limit, sizeof(jchar), limit);
  4270   lduh(ary1, limit, chr1);
  4631   lduh(ary1, limit, result);
  4271   lduh(ary2, limit, chr2);
  4632   lduh(ary2, limit, tmp);
  4272   cmp(chr1, chr2);
  4633   cmp(result, tmp);
  4273   br(Assembler::notEqual, true, Assembler::pt, Ldone);
  4634   br(Assembler::notEqual, true, Assembler::pt, Ldone);
  4274   delayed()->mov(G0, result);     // not equal
  4635   delayed()->mov(G0, result);     // not equal
  4275 
  4636 
  4276   // only one char ?
  4637   // only one char?
  4277   cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
  4638   cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
  4278   delayed()->add(G0, 1, result); // zero-length arrays are equal
  4639   delayed()->add(G0, 1, result); // zero-length arrays are equal
  4279 
  4640 
  4280   // word by word compare, dont't need alignment check
  4641   // word by word compare, dont't need alignment check
  4281   bind(Lvector);
  4642   bind(Lvector);
  4282   // Shift ary1 and ary2 to the end of the arrays, negate limit
  4643   // Shift ary1 and ary2 to the end of the arrays, negate limit
  4283   add(ary1, limit, ary1);
  4644   add(ary1, limit, ary1);
  4284   add(ary2, limit, ary2);
  4645   add(ary2, limit, ary2);
  4285   neg(limit, limit);
  4646   neg(limit, limit);
  4286 
  4647 
  4287   lduw(ary1, limit, chr1);
  4648   lduw(ary1, limit, result);
  4288   bind(Lloop);
  4649   bind(Lloop);
  4289   lduw(ary2, limit, chr2);
  4650   lduw(ary2, limit, tmp);
  4290   cmp(chr1, chr2);
  4651   cmp(result, tmp);
  4291   br(Assembler::notEqual, true, Assembler::pt, Ldone);
  4652   br(Assembler::notEqual, true, Assembler::pt, Ldone);
  4292   delayed()->mov(G0, result);     // not equal
  4653   delayed()->mov(G0, result);     // not equal
  4293   inccc(limit, 2*sizeof(jchar));
  4654   inccc(limit, 2*sizeof(jchar));
  4294   // annul LDUW if branch is not taken to prevent access past end of array
  4655   // annul LDUW if branch is not taken to prevent access past end of array
  4295   br(Assembler::notZero, true, Assembler::pt, Lloop);
  4656   br(Assembler::notZero, true, Assembler::pt, Lloop);
  4296   delayed()->lduw(ary1, limit, chr1); // hoisted
  4657   delayed()->lduw(ary1, limit, result); // hoisted
  4297 
  4658 
  4298   // Caller should set it:
  4659   add(G0, 1, result); // equals
  4299   // add(G0, 1, result); // equals
  4660   bind(Ldone);
  4300 }
  4661 }
       
  4662 
       
  4663 #endif
  4301 
  4664 
  4302 // Use BIS for zeroing (count is in bytes).
  4665 // Use BIS for zeroing (count is in bytes).
  4303 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
  4666 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
  4304   assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
  4667   assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
  4305   Register end = count;
  4668   Register end = count;