59 // offset := (7 - (end - start)) + 3 |
59 // offset := (7 - (end - start)) + 3 |
60 // 3 instructions from rdpc to DISPATCH |
60 // 3 instructions from rdpc to DISPATCH |
61 " sub %[offset], %[end], %[offset]\n\t" // offset := start - end |
61 " sub %[offset], %[end], %[offset]\n\t" // offset := start - end |
62 " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4 |
62 " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4 |
63 " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size |
63 " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size |
64 " rd %pc, %[pc]\n\t" // dispatch on scaled offset |
64 " rd %%pc, %[pc]\n\t" // dispatch on scaled offset |
65 " jmpl %[pc]+%[offset], %g0\n\t" |
65 " jmpl %[pc]+%[offset], %%g0\n\t" |
66 " nop\n\t" |
66 " nop\n\t" |
67 // DISPATCH: no direct reference, but without it the store block may be elided. |
67 // DISPATCH: no direct reference, but without it the store block may be elided. |
68 "1:\n\t" |
68 "1:\n\t" |
69 " stb %[value], [%[end]-7]\n\t" // end[-7] = value |
69 " stb %[value], [%[end]-7]\n\t" // end[-7] = value |
70 " stb %[value], [%[end]-6]\n\t" |
70 " stb %[value], [%[end]-6]\n\t" |
106 uintptr_t temp; |
106 uintptr_t temp; |
107 __asm__ volatile( |
107 __asm__ volatile( |
108 // Unroll loop x8. |
108 // Unroll loop x8. |
109 " sub %[aend], %[ato], %[temp]\n\t" |
109 " sub %[aend], %[ato], %[temp]\n\t" |
110 " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words |
110 " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words |
111 " ba %xcc, 2f\n\t" // goto TEST always |
111 " ba %%xcc, 2f\n\t" // goto TEST always |
112 " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words |
112 " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words |
113 // LOOP: |
113 // LOOP: |
114 "1:\n\t" // unrolled x8 store loop top |
114 "1:\n\t" // unrolled x8 store loop top |
115 " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to |
115 " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to |
116 " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented |
116 " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented |
121 " stx %[xvalue], [%[ato]-24]\n\t" |
121 " stx %[xvalue], [%[ato]-24]\n\t" |
122 " stx %[xvalue], [%[ato]-16]\n\t" |
122 " stx %[xvalue], [%[ato]-16]\n\t" |
123 " stx %[xvalue], [%[ato]-8]\n\t" |
123 " stx %[xvalue], [%[ato]-8]\n\t" |
124 // TEST: |
124 // TEST: |
125 "2:\n\t" |
125 "2:\n\t" |
126 " bgu,a %xcc, 1b\n\t" // goto LOOP if more than 7 words remaining |
126 " bgu,a %%xcc, 1b\n\t" // goto LOOP if more than 7 words remaining |
127 " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration |
127 " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration |
128 // Fill remaining < 8 full words. |
128 // Fill remaining < 8 full words. |
129 // Dispatch on (aligned_end - aligned_to). |
129 // Dispatch on (aligned_end - aligned_to). |
130 // offset := (7 - (aligned_end - aligned_to)) + 3 |
130 // offset := (7 - (aligned_end - aligned_to)) + 3 |
131 // 3 instructions from rdpc to DISPATCH |
131 // 3 instructions from rdpc to DISPATCH |
132 " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end |
132 " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end |
133 " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4 |
133 " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4 |
134 " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size |
134 " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size |
135 " rd %pc, %[temp]\n\t" // dispatch on scaled offset |
135 " rd %%pc, %[temp]\n\t" // dispatch on scaled offset |
136 " jmpl %[temp]+%[ato], %g0\n\t" |
136 " jmpl %[temp]+%[ato], %%g0\n\t" |
137 " nop\n\t" |
137 " nop\n\t" |
138 // DISPATCH: no direct reference, but without it the store block may be elided. |
138 // DISPATCH: no direct reference, but without it the store block may be elided. |
139 "3:\n\t" |
139 "3:\n\t" |
140 " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue |
140 " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue |
141 " stx %[xvalue], [%[aend]-48]\n\t" |
141 " stx %[xvalue], [%[aend]-48]\n\t" |