|
1 /* |
|
2 * Copyright 1998-2003 Sun Microsystems, Inc. All Rights Reserved. |
|
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
|
4 * |
|
5 * This code is free software; you can redistribute it and/or modify it |
|
6 * under the terms of the GNU General Public License version 2 only, as |
|
7 * published by the Free Software Foundation. Sun designates this |
|
8 * particular file as subject to the "Classpath" exception as provided |
|
9 * by Sun in the LICENSE file that accompanied this code. |
|
10 * |
|
11 * This code is distributed in the hope that it will be useful, but WITHOUT |
|
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 * version 2 for more details (a copy is included in the LICENSE file that |
|
15 * accompanied this code). |
|
16 * |
|
17 * You should have received a copy of the GNU General Public License version |
|
18 * 2 along with this work; if not, write to the Free Software Foundation, |
|
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
|
20 * |
|
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
|
22 * CA 95054 USA or visit www.sun.com if you need additional information or |
|
23 * have any questions. |
|
24 */ |
|
25 |
|
26 |
|
27 |
|
28 #include "vis_proto.h" |
|
29 #include "mlib_image.h" |
|
30 #include "mlib_v_ImageLookUpFunc.h" |
|
31 |
|
32 /***************************************************************/ |
|
33 static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff0_D1(const mlib_u8 *src, |
|
34 mlib_u8 *dst, |
|
35 mlib_s32 xsize, |
|
36 const mlib_u16 *table); |
|
37 |
|
38 static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff1_D1(const mlib_u8 *src, |
|
39 mlib_u8 *dst, |
|
40 mlib_s32 xsize, |
|
41 const mlib_u16 *table); |
|
42 |
|
43 static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff2_D1(const mlib_u8 *src, |
|
44 mlib_u8 *dst, |
|
45 mlib_s32 xsize, |
|
46 const mlib_u16 *table); |
|
47 |
|
48 static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff3_D1(const mlib_u8 *src, |
|
49 mlib_u8 *dst, |
|
50 mlib_s32 xsize, |
|
51 const mlib_u16 *table); |
|
52 |
|
53 static void mlib_v_ImageLookUpSI_U8_U8_2_DstNonAl_D1(const mlib_u8 *src, |
|
54 mlib_u8 *dst, |
|
55 mlib_s32 xsize, |
|
56 const mlib_u16 *table); |
|
57 |
|
58 static void mlib_v_ImageLookUpSI_U8_U8_2_DstA8D1_SMALL(const mlib_u8 *src, |
|
59 mlib_u8 *dst, |
|
60 mlib_s32 xsize, |
|
61 const mlib_u8 **table); |
|
62 |
|
63 static void mlib_v_ImageLookUpSI_U8_U8_2_D1_SMALL(const mlib_u8 *src, |
|
64 mlib_u8 *dst, |
|
65 mlib_s32 xsize, |
|
66 const mlib_u8 **table); |
|
67 |
|
68 static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff0_D1(const mlib_u8 *src, |
|
69 mlib_u8 *dst, |
|
70 mlib_s32 xsize, |
|
71 const mlib_d64 *table); |
|
72 |
|
73 static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff1_D1(const mlib_u8 *src, |
|
74 mlib_u8 *dst, |
|
75 mlib_s32 xsize, |
|
76 const mlib_d64 *table); |
|
77 |
|
78 static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff2_D1(const mlib_u8 *src, |
|
79 mlib_u8 *dst, |
|
80 mlib_s32 xsize, |
|
81 const mlib_d64 *table); |
|
82 |
|
83 static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff3_D1(const mlib_u8 *src, |
|
84 mlib_u8 *dst, |
|
85 mlib_s32 xsize, |
|
86 const mlib_d64 *table); |
|
87 |
|
88 static void mlib_v_ImageLookUpSI_U8_U8_3_D1_SMALL(const mlib_u8 *src, |
|
89 mlib_u8 *dst, |
|
90 mlib_s32 xsize, |
|
91 const mlib_u8 **table); |
|
92 |
|
93 static void mlib_v_ImageLookUpSI_U8_U8_4_SrcOff0_D1(const mlib_u8 *src, |
|
94 mlib_u8 *dst, |
|
95 mlib_s32 xsize, |
|
96 const mlib_f32 *table); |
|
97 |
|
98 static void mlib_v_ImageLookUpSI_U8_U8_4_DstNonAl_D1(const mlib_u8 *src, |
|
99 mlib_u8 *dst, |
|
100 mlib_s32 xsize, |
|
101 const mlib_f32 *table); |
|
102 |
|
103 static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff0_D1_SMALL(const mlib_u8 *src, |
|
104 mlib_u8 *dst, |
|
105 mlib_s32 xsize, |
|
106 const mlib_u8 **table); |
|
107 |
|
108 static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff1_D1_SMALL(const mlib_u8 *src, |
|
109 mlib_u8 *dst, |
|
110 mlib_s32 xsize, |
|
111 const mlib_u8 **table); |
|
112 |
|
113 static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff2_D1_SMALL(const mlib_u8 *src, |
|
114 mlib_u8 *dst, |
|
115 mlib_s32 xsize, |
|
116 const mlib_u8 **table); |
|
117 |
|
118 static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff3_D1_SMALL(const mlib_u8 *src, |
|
119 mlib_u8 *dst, |
|
120 mlib_s32 xsize, |
|
121 const mlib_u8 **table); |
|
122 |
|
123 /***************************************************************/ |
|
124 #define VIS_LD_U8_I(X, Y) vis_ld_u8_i((void *)(X), (Y)) |
|
125 #define VIS_LD_U16_I(X, Y) vis_ld_u16_i((void *)(X), (Y)) |
|
126 |
|
127 /***************************************************************/ |
|
128 void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff0_D1(const mlib_u8 *src, |
|
129 mlib_u8 *dst, |
|
130 mlib_s32 xsize, |
|
131 const mlib_u16 *table) |
|
132 { |
|
133 mlib_u32 *sa; /* aligned pointer to source data */ |
|
134 mlib_u8 *sp; /* pointer to source data */ |
|
135 mlib_u32 s0; /* source data */ |
|
136 mlib_u16 *dl; /* pointer to start of destination */ |
|
137 mlib_u16 *dend; /* pointer to end of destination */ |
|
138 mlib_d64 *dp; /* aligned pointer to destination */ |
|
139 mlib_d64 t0, t1, t2; /* destination data */ |
|
140 mlib_d64 t3, acc; /* destination data */ |
|
141 mlib_s32 emask; /* edge mask */ |
|
142 mlib_s32 i, num; /* loop variable */ |
|
143 |
|
144 sa = (mlib_u32*)src; |
|
145 dl = (mlib_u16*)dst; |
|
146 dp = (mlib_d64 *) dl; |
|
147 dend = dl + xsize - 1; |
|
148 |
|
149 vis_alignaddr((void *) 0, 6); |
|
150 |
|
151 if (xsize >= 4) { |
|
152 |
|
153 s0 = sa[0]; |
|
154 sa ++; |
|
155 |
|
156 #pragma pipeloop(0) |
|
157 for(i = 0; i <= xsize - 8; i+=4, sa++) { |
|
158 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
159 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); |
|
160 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); |
|
161 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); |
|
162 acc = vis_faligndata(t3, acc); |
|
163 acc = vis_faligndata(t2, acc); |
|
164 acc = vis_faligndata(t1, acc); |
|
165 acc = vis_faligndata(t0, acc); |
|
166 s0 = sa[0]; |
|
167 *dp++ = acc; |
|
168 } |
|
169 |
|
170 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
171 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); |
|
172 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); |
|
173 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); |
|
174 acc = vis_faligndata(t3, acc); |
|
175 acc = vis_faligndata(t2, acc); |
|
176 acc = vis_faligndata(t1, acc); |
|
177 acc = vis_faligndata(t0, acc); |
|
178 *dp++ = acc; |
|
179 } |
|
180 |
|
181 sp = (mlib_u8*)sa; |
|
182 |
|
183 if ((mlib_addr) dp <= (mlib_addr) dend) { |
|
184 |
|
185 num = (mlib_u16*) dend - (mlib_u16*) dp; |
|
186 sp += num; |
|
187 num ++; |
|
188 #pragma pipeloop(0) |
|
189 for (i = 0; i < num; i ++) { |
|
190 s0 = (mlib_s32) *sp; |
|
191 sp --; |
|
192 |
|
193 t0 = VIS_LD_U16_I(table, 2*s0); |
|
194 acc = vis_faligndata(t0, acc); |
|
195 } |
|
196 |
|
197 emask = vis_edge16(dp, dend); |
|
198 vis_pst_16(acc, dp, emask); |
|
199 } |
|
200 } |
|
201 |
|
202 /***************************************************************/ |
|
203 void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff1_D1(const mlib_u8 *src, |
|
204 mlib_u8 *dst, |
|
205 mlib_s32 xsize, |
|
206 const mlib_u16 *table) |
|
207 { |
|
208 mlib_u32 *sa; /* aligned pointer to source data */ |
|
209 mlib_u8 *sp; /* pointer to source data */ |
|
210 mlib_u32 s0, s1; /* source data */ |
|
211 mlib_u16 *dl; /* pointer to start of destination */ |
|
212 mlib_u16 *dend; /* pointer to end of destination */ |
|
213 mlib_d64 *dp; /* aligned pointer to destination */ |
|
214 mlib_d64 t0, t1, t2; /* destination data */ |
|
215 mlib_d64 t3, acc; /* destination data */ |
|
216 mlib_s32 emask; /* edge mask */ |
|
217 mlib_s32 i, num; /* loop variable */ |
|
218 |
|
219 sa = (mlib_u32*)(src-1); |
|
220 dl = (mlib_u16*)dst; |
|
221 dp = (mlib_d64 *) dl; |
|
222 dend = dl + xsize - 1; |
|
223 |
|
224 vis_alignaddr((void *) 0, 6); |
|
225 |
|
226 s0 = *sa++; |
|
227 |
|
228 if (xsize >= 4) { |
|
229 |
|
230 s1 = sa[0]; |
|
231 sa ++; |
|
232 |
|
233 #pragma pipeloop(0) |
|
234 for(i = 0; i <= xsize - 8; i+=4, sa++) { |
|
235 t3 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); |
|
236 t2 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
237 t1 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); |
|
238 t0 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); |
|
239 acc = vis_faligndata(t3, acc); |
|
240 acc = vis_faligndata(t2, acc); |
|
241 acc = vis_faligndata(t1, acc); |
|
242 acc = vis_faligndata(t0, acc); |
|
243 s0 = s1; |
|
244 s1 = sa[0]; |
|
245 *dp++ = acc; |
|
246 } |
|
247 |
|
248 t3 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); |
|
249 t2 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
250 t1 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); |
|
251 t0 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); |
|
252 acc = vis_faligndata(t3, acc); |
|
253 acc = vis_faligndata(t2, acc); |
|
254 acc = vis_faligndata(t1, acc); |
|
255 acc = vis_faligndata(t0, acc); |
|
256 *dp++ = acc; |
|
257 } |
|
258 |
|
259 sp = (mlib_u8*)sa; |
|
260 sp -= 3; |
|
261 |
|
262 if ((mlib_addr) dp <= (mlib_addr) dend) { |
|
263 |
|
264 num = (mlib_u16*) dend - (mlib_u16*) dp; |
|
265 sp += num; |
|
266 num ++; |
|
267 #pragma pipeloop(0) |
|
268 for (i = 0; i < num; i ++) { |
|
269 s0 = (mlib_s32) *sp; |
|
270 sp --; |
|
271 |
|
272 t0 = VIS_LD_U16_I(table, 2*s0); |
|
273 acc = vis_faligndata(t0, acc); |
|
274 } |
|
275 |
|
276 emask = vis_edge16(dp, dend); |
|
277 vis_pst_16(acc, dp, emask); |
|
278 } |
|
279 } |
|
280 |
|
281 /***************************************************************/ |
|
282 void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff2_D1(const mlib_u8 *src, |
|
283 mlib_u8 *dst, |
|
284 mlib_s32 xsize, |
|
285 const mlib_u16 *table) |
|
286 { |
|
287 mlib_u32 *sa; /* pointer to source data */ |
|
288 mlib_u8 *sp; /* pointer to source data */ |
|
289 mlib_u32 s0, s1; /* source data */ |
|
290 mlib_u16 *dl; /* pointer to start of destination */ |
|
291 mlib_u16 *dend; /* pointer to end of destination */ |
|
292 mlib_d64 *dp; /* aligned pointer to destination */ |
|
293 mlib_d64 t0, t1, t2; /* destination data */ |
|
294 mlib_d64 t3, acc; /* destination data */ |
|
295 mlib_s32 emask; /* edge mask */ |
|
296 mlib_s32 i, num; /* loop variable */ |
|
297 |
|
298 sa = (mlib_u32*)(src-2); |
|
299 dl = (mlib_u16*)dst; |
|
300 dp = (mlib_d64 *) dl; |
|
301 dend = dl + xsize - 1; |
|
302 |
|
303 vis_alignaddr((void *) 0, 6); |
|
304 |
|
305 s0 = *sa++; |
|
306 |
|
307 if (xsize >= 4) { |
|
308 |
|
309 s1 = sa[0]; |
|
310 sa ++; |
|
311 |
|
312 #pragma pipeloop(0) |
|
313 for(i = 0; i <= xsize - 8; i+=4, sa++) { |
|
314 t3 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); |
|
315 t2 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); |
|
316 t1 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
317 t0 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); |
|
318 acc = vis_faligndata(t3, acc); |
|
319 acc = vis_faligndata(t2, acc); |
|
320 acc = vis_faligndata(t1, acc); |
|
321 acc = vis_faligndata(t0, acc); |
|
322 s0 = s1; |
|
323 s1 = sa[0]; |
|
324 *dp++ = acc; |
|
325 } |
|
326 |
|
327 t3 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); |
|
328 t2 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); |
|
329 t1 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
330 t0 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); |
|
331 acc = vis_faligndata(t3, acc); |
|
332 acc = vis_faligndata(t2, acc); |
|
333 acc = vis_faligndata(t1, acc); |
|
334 acc = vis_faligndata(t0, acc); |
|
335 *dp++ = acc; |
|
336 } |
|
337 |
|
338 sp = (mlib_u8*)sa; |
|
339 sp -= 2; |
|
340 |
|
341 if ((mlib_addr) dp <= (mlib_addr) dend) { |
|
342 |
|
343 num = (mlib_u16*) dend - (mlib_u16*) dp; |
|
344 sp += num; |
|
345 num ++; |
|
346 #pragma pipeloop(0) |
|
347 for (i = 0; i < num; i ++) { |
|
348 s0 = (mlib_s32) *sp; |
|
349 sp --; |
|
350 |
|
351 t0 = VIS_LD_U16_I(table, 2*s0); |
|
352 acc = vis_faligndata(t0, acc); |
|
353 } |
|
354 |
|
355 emask = vis_edge16(dp, dend); |
|
356 vis_pst_16(acc, dp, emask); |
|
357 } |
|
358 } |
|
359 |
|
360 /***************************************************************/ |
|
361 void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff3_D1(const mlib_u8 *src, |
|
362 mlib_u8 *dst, |
|
363 mlib_s32 xsize, |
|
364 const mlib_u16 *table) |
|
365 { |
|
366 mlib_u32 *sa; /* aligned pointer to source data */ |
|
367 mlib_u8 *sp; /* pointer to source data */ |
|
368 mlib_u32 s0, s1; /* source data */ |
|
369 mlib_u16 *dl; /* pointer to start of destination */ |
|
370 mlib_u16 *dend; /* pointer to end of destination */ |
|
371 mlib_d64 *dp; /* aligned pointer to destination */ |
|
372 mlib_d64 t0, t1, t2; /* destination data */ |
|
373 mlib_d64 t3, acc; /* destination data */ |
|
374 mlib_s32 emask; /* edge mask */ |
|
375 mlib_s32 i, num; /* loop variable */ |
|
376 |
|
377 sa = (mlib_u32*)(src-3); |
|
378 dl = (mlib_u16*)dst; |
|
379 dp = (mlib_d64 *) dl; |
|
380 dend = dl + xsize - 1; |
|
381 |
|
382 vis_alignaddr((void *) 0, 6); |
|
383 |
|
384 s0 = *sa++; |
|
385 |
|
386 if (xsize >= 4) { |
|
387 |
|
388 s1 = sa[0]; |
|
389 sa ++; |
|
390 |
|
391 #pragma pipeloop(0) |
|
392 for(i = 0; i <= xsize - 8; i+=4, sa++) { |
|
393 t3 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE); |
|
394 t2 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); |
|
395 t1 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); |
|
396 t0 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
397 acc = vis_faligndata(t3, acc); |
|
398 acc = vis_faligndata(t2, acc); |
|
399 acc = vis_faligndata(t1, acc); |
|
400 acc = vis_faligndata(t0, acc); |
|
401 s0 = s1; |
|
402 s1 = sa[0]; |
|
403 *dp++ = acc; |
|
404 } |
|
405 |
|
406 t3 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE); |
|
407 t2 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); |
|
408 t1 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); |
|
409 t0 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
410 acc = vis_faligndata(t3, acc); |
|
411 acc = vis_faligndata(t2, acc); |
|
412 acc = vis_faligndata(t1, acc); |
|
413 acc = vis_faligndata(t0, acc); |
|
414 *dp++ = acc; |
|
415 } |
|
416 |
|
417 sp = (mlib_u8*)sa; |
|
418 sp -= 1; |
|
419 |
|
420 if ((mlib_addr) dp <= (mlib_addr) dend) { |
|
421 |
|
422 num = (mlib_u16*) dend - (mlib_u16*) dp; |
|
423 sp += num; |
|
424 num ++; |
|
425 #pragma pipeloop(0) |
|
426 for (i = 0; i < num; i ++) { |
|
427 s0 = (mlib_s32) *sp; |
|
428 sp --; |
|
429 |
|
430 t0 = VIS_LD_U16_I(table, 2*s0); |
|
431 acc = vis_faligndata(t0, acc); |
|
432 } |
|
433 |
|
434 emask = vis_edge16(dp, dend); |
|
435 vis_pst_16(acc, dp, emask); |
|
436 } |
|
437 } |
|
438 |
|
439 /***************************************************************/ |
|
440 void mlib_v_ImageLookUpSI_U8_U8_2_DstNonAl_D1(const mlib_u8 *src, |
|
441 mlib_u8 *dst, |
|
442 mlib_s32 xsize, |
|
443 const mlib_u16 *table) |
|
444 { |
|
445 mlib_u32 *sa; /* aligned pointer to source data */ |
|
446 mlib_u8 *sp; /* pointer to source data */ |
|
447 mlib_u32 s0, s1, s2, s3; /* source data */ |
|
448 mlib_u8 *dl; /* pointer to start of destination */ |
|
449 mlib_u8 *dend; /* pointer to end of destination */ |
|
450 mlib_d64 *dp; /* aligned pointer to destination */ |
|
451 mlib_d64 t0, t1, t2; /* destination data */ |
|
452 mlib_d64 t3, t4, t5; /* destination data */ |
|
453 mlib_d64 t6, t7, acc0; /* destination data */ |
|
454 mlib_d64 acc1, acc2; /* destination data */ |
|
455 mlib_d64 acc3, acc4; /* destination data */ |
|
456 mlib_s32 emask; /* edge mask */ |
|
457 mlib_s32 i, num; /* loop variable */ |
|
458 mlib_s32 off; /* offset */ |
|
459 |
|
460 sa = (mlib_u32*)src; |
|
461 dl = dst; |
|
462 sp = (void *)src; |
|
463 dend = dl + 2*xsize - 1; |
|
464 dp = (mlib_d64 *) ((mlib_addr) dl & (~7)); |
|
465 off = (mlib_addr) dp - (mlib_addr) dl; |
|
466 |
|
467 emask = vis_edge8(dl, dend); |
|
468 num = (xsize < 4) ? xsize : 4; |
|
469 |
|
470 sp += (num-1); |
|
471 |
|
472 vis_alignaddr(dp, 6); |
|
473 |
|
474 for (i = 0; i < num; i ++) { |
|
475 s0 = (mlib_s32) *sp; |
|
476 sp --; |
|
477 |
|
478 t0 = VIS_LD_U16_I(table, 2*s0); |
|
479 acc0 = vis_faligndata(t0, acc0); |
|
480 } |
|
481 |
|
482 vis_alignaddr(dp, off); |
|
483 vis_pst_8(vis_faligndata(acc0, acc0), dp++, emask); |
|
484 |
|
485 sa++; |
|
486 |
|
487 xsize -= 4; |
|
488 |
|
489 i = 0; |
|
490 |
|
491 if (xsize >= 16) { |
|
492 |
|
493 s0 = sa[0]; |
|
494 s1 = sa[1]; |
|
495 s2 = sa[2]; |
|
496 s3 = sa[3]; |
|
497 sa += 4; |
|
498 |
|
499 #pragma pipeloop(0) |
|
500 for(i = 0; i <= xsize - 32; i+=16, sa+=4) { |
|
501 vis_alignaddr(dp, 6); |
|
502 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
503 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); |
|
504 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); |
|
505 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); |
|
506 acc1 = vis_faligndata(t3, acc1); |
|
507 acc1 = vis_faligndata(t2, acc1); |
|
508 acc1 = vis_faligndata(t1, acc1); |
|
509 acc1 = vis_faligndata(t0, acc1); |
|
510 t7 = VIS_LD_U16_I(table, (s1 << 1) & 0x1FE); |
|
511 t6 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE); |
|
512 t5 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); |
|
513 t4 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); |
|
514 acc2 = vis_faligndata(t7, acc2); |
|
515 acc2 = vis_faligndata(t6, acc2); |
|
516 acc2 = vis_faligndata(t5, acc2); |
|
517 acc2 = vis_faligndata(t4, acc2); |
|
518 t3 = VIS_LD_U16_I(table, (s2 << 1) & 0x1FE); |
|
519 t2 = VIS_LD_U16_I(table, (s2 >> 7) & 0x1FE); |
|
520 t1 = VIS_LD_U16_I(table, (s2 >> 15) & 0x1FE); |
|
521 t0 = VIS_LD_U16_I(table, (s2 >> 23) & 0x1FE); |
|
522 acc3 = vis_faligndata(t3, acc3); |
|
523 acc3 = vis_faligndata(t2, acc3); |
|
524 acc3 = vis_faligndata(t1, acc3); |
|
525 acc3 = vis_faligndata(t0, acc3); |
|
526 t7 = VIS_LD_U16_I(table, (s3 << 1) & 0x1FE); |
|
527 t6 = VIS_LD_U16_I(table, (s3 >> 7) & 0x1FE); |
|
528 t5 = VIS_LD_U16_I(table, (s3 >> 15) & 0x1FE); |
|
529 t4 = VIS_LD_U16_I(table, (s3 >> 23) & 0x1FE); |
|
530 acc4 = vis_faligndata(t7, acc4); |
|
531 acc4 = vis_faligndata(t6, acc4); |
|
532 acc4 = vis_faligndata(t5, acc4); |
|
533 acc4 = vis_faligndata(t4, acc4); |
|
534 vis_alignaddr(dp, off); |
|
535 s0 = sa[0]; |
|
536 s1 = sa[1]; |
|
537 s2 = sa[2]; |
|
538 s3 = sa[3]; |
|
539 *dp++ = vis_faligndata(acc0, acc1); |
|
540 *dp++ = vis_faligndata(acc1, acc2); |
|
541 *dp++ = vis_faligndata(acc2, acc3); |
|
542 *dp++ = vis_faligndata(acc3, acc4); |
|
543 acc0 = acc4; |
|
544 } |
|
545 |
|
546 vis_alignaddr(dp, 6); |
|
547 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
548 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); |
|
549 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); |
|
550 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); |
|
551 acc1 = vis_faligndata(t3, acc1); |
|
552 acc1 = vis_faligndata(t2, acc1); |
|
553 acc1 = vis_faligndata(t1, acc1); |
|
554 acc1 = vis_faligndata(t0, acc1); |
|
555 t7 = VIS_LD_U16_I(table, (s1 << 1) & 0x1FE); |
|
556 t6 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE); |
|
557 t5 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); |
|
558 t4 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); |
|
559 acc2 = vis_faligndata(t7, acc2); |
|
560 acc2 = vis_faligndata(t6, acc2); |
|
561 acc2 = vis_faligndata(t5, acc2); |
|
562 acc2 = vis_faligndata(t4, acc2); |
|
563 t3 = VIS_LD_U16_I(table, (s2 << 1) & 0x1FE); |
|
564 t2 = VIS_LD_U16_I(table, (s2 >> 7) & 0x1FE); |
|
565 t1 = VIS_LD_U16_I(table, (s2 >> 15) & 0x1FE); |
|
566 t0 = VIS_LD_U16_I(table, (s2 >> 23) & 0x1FE); |
|
567 acc3 = vis_faligndata(t3, acc3); |
|
568 acc3 = vis_faligndata(t2, acc3); |
|
569 acc3 = vis_faligndata(t1, acc3); |
|
570 acc3 = vis_faligndata(t0, acc3); |
|
571 t7 = VIS_LD_U16_I(table, (s3 << 1) & 0x1FE); |
|
572 t6 = VIS_LD_U16_I(table, (s3 >> 7) & 0x1FE); |
|
573 t5 = VIS_LD_U16_I(table, (s3 >> 15) & 0x1FE); |
|
574 t4 = VIS_LD_U16_I(table, (s3 >> 23) & 0x1FE); |
|
575 acc4 = vis_faligndata(t7, acc4); |
|
576 acc4 = vis_faligndata(t6, acc4); |
|
577 acc4 = vis_faligndata(t5, acc4); |
|
578 acc4 = vis_faligndata(t4, acc4); |
|
579 vis_alignaddr(dp, off); |
|
580 *dp++ = vis_faligndata(acc0, acc1); |
|
581 *dp++ = vis_faligndata(acc1, acc2); |
|
582 *dp++ = vis_faligndata(acc2, acc3); |
|
583 *dp++ = vis_faligndata(acc3, acc4); |
|
584 acc0 = acc4; i+=16; |
|
585 } |
|
586 |
|
587 if (i <= xsize - 8) { |
|
588 s0 = sa[0]; |
|
589 s1 = sa[1]; |
|
590 vis_alignaddr(dp, 6); |
|
591 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
592 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); |
|
593 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); |
|
594 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); |
|
595 acc1 = vis_faligndata(t3, acc1); |
|
596 acc1 = vis_faligndata(t2, acc1); |
|
597 acc1 = vis_faligndata(t1, acc1); |
|
598 acc1 = vis_faligndata(t0, acc1); |
|
599 t7 = VIS_LD_U16_I(table, (s1 << 1) & 0x1FE); |
|
600 t6 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE); |
|
601 t5 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE); |
|
602 t4 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE); |
|
603 acc2 = vis_faligndata(t7, acc2); |
|
604 acc2 = vis_faligndata(t6, acc2); |
|
605 acc2 = vis_faligndata(t5, acc2); |
|
606 acc2 = vis_faligndata(t4, acc2); |
|
607 vis_alignaddr(dp, off); |
|
608 *dp++ = vis_faligndata(acc0, acc1); |
|
609 *dp++ = vis_faligndata(acc1, acc2); |
|
610 acc0 = acc2; i += 8; sa += 2; |
|
611 } |
|
612 |
|
613 if (i <= xsize - 4) { |
|
614 s0 = *sa++; |
|
615 vis_alignaddr(dp, 6); |
|
616 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE); |
|
617 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE); |
|
618 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE); |
|
619 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE); |
|
620 acc1 = vis_faligndata(t3, acc1); |
|
621 acc1 = vis_faligndata(t2, acc1); |
|
622 acc1 = vis_faligndata(t1, acc1); |
|
623 acc1 = vis_faligndata(t0, acc1); |
|
624 vis_alignaddr(dp, off); |
|
625 *dp++ = vis_faligndata(acc0, acc1); |
|
626 acc0 = acc1; |
|
627 } |
|
628 |
|
629 sp = (mlib_u8*)sa; |
|
630 |
|
631 if ((mlib_addr) dp <= (mlib_addr) dend) { |
|
632 |
|
633 num = (((mlib_u8*) dend - (mlib_u8*) dp) + off + 1) >> 1; |
|
634 sp += (num - 1); |
|
635 vis_alignaddr(dp, 6); |
|
636 #pragma pipeloop(0) |
|
637 for (i = 0; i < num; i ++) { |
|
638 s0 = (mlib_s32) *sp; |
|
639 sp --; |
|
640 |
|
641 t0 = VIS_LD_U16_I(table, 2*s0); |
|
642 acc1 = vis_faligndata(t0, acc1); |
|
643 } |
|
644 |
|
645 vis_alignaddr(dp, off); |
|
646 emask = vis_edge8(dp, dend); |
|
647 vis_pst_8(vis_faligndata(acc0, acc1), dp++, emask); |
|
648 } |
|
649 |
|
650 if ((mlib_addr) dp <= (mlib_addr) dend) { |
|
651 emask = vis_edge8(dp, dend); |
|
652 vis_pst_8(vis_faligndata(acc1, acc1), dp++, emask); |
|
653 } |
|
654 } |
|
655 |
|
656 /***************************************************************/ |
|
657 void mlib_v_ImageLookUpSI_U8_U8_2_DstA8D1_SMALL(const mlib_u8 *src, |
|
658 mlib_u8 *dst, |
|
659 mlib_s32 xsize, |
|
660 const mlib_u8 **table) |
|
661 { |
|
662 mlib_u8 *sp; /* pointer to source data */ |
|
663 mlib_u32 s0, s1, s2, s3; /* source data */ |
|
664 mlib_u16 *dl; /* pointer to start of destination */ |
|
665 mlib_u16 *dend; /* pointer to end of destination */ |
|
666 mlib_d64 *dp; /* aligned pointer to destination */ |
|
667 mlib_d64 t0, t1, t2; /* destination data */ |
|
668 mlib_d64 t3, t4, t5; /* destination data */ |
|
669 mlib_d64 t6, t7, acc; /* destination data */ |
|
670 mlib_s32 emask; /* edge mask */ |
|
671 mlib_s32 i, num; /* loop variable */ |
|
672 const mlib_u8 *tab0 = table[0]; |
|
673 const mlib_u8 *tab1 = table[1]; |
|
674 |
|
675 sp = (void *)src; |
|
676 dl = (mlib_u16*)dst; |
|
677 dp = (mlib_d64 *) dl; |
|
678 dend = dl + xsize - 1; |
|
679 |
|
680 vis_alignaddr((void *) 0, 7); |
|
681 |
|
682 if (xsize >= 4) { |
|
683 |
|
684 s0 = sp[0]; |
|
685 s1 = sp[1]; |
|
686 s2 = sp[2]; |
|
687 s3 = sp[3]; |
|
688 sp += 4; |
|
689 |
|
690 #pragma pipeloop(0) |
|
691 for(i = 0; i <= xsize - 8; i+=4, sp+=4) { |
|
692 t7 = VIS_LD_U8_I(tab1, s3); |
|
693 t6 = VIS_LD_U8_I(tab0, s3); |
|
694 t5 = VIS_LD_U8_I(tab1, s2); |
|
695 t4 = VIS_LD_U8_I(tab0, s2); |
|
696 t3 = VIS_LD_U8_I(tab1, s1); |
|
697 t2 = VIS_LD_U8_I(tab0, s1); |
|
698 t1 = VIS_LD_U8_I(tab1, s0); |
|
699 t0 = VIS_LD_U8_I(tab0, s0); |
|
700 acc = vis_faligndata(t7, acc); |
|
701 acc = vis_faligndata(t6, acc); |
|
702 acc = vis_faligndata(t5, acc); |
|
703 acc = vis_faligndata(t4, acc); |
|
704 acc = vis_faligndata(t3, acc); |
|
705 acc = vis_faligndata(t2, acc); |
|
706 acc = vis_faligndata(t1, acc); |
|
707 acc = vis_faligndata(t0, acc); |
|
708 s0 = sp[0]; |
|
709 s1 = sp[1]; |
|
710 s2 = sp[2]; |
|
711 s3 = sp[3]; |
|
712 *dp++ = acc; |
|
713 } |
|
714 |
|
715 t7 = VIS_LD_U8_I(tab1, s3); |
|
716 t6 = VIS_LD_U8_I(tab0, s3); |
|
717 t5 = VIS_LD_U8_I(tab1, s2); |
|
718 t4 = VIS_LD_U8_I(tab0, s2); |
|
719 t3 = VIS_LD_U8_I(tab1, s1); |
|
720 t2 = VIS_LD_U8_I(tab0, s1); |
|
721 t1 = VIS_LD_U8_I(tab1, s0); |
|
722 t0 = VIS_LD_U8_I(tab0, s0); |
|
723 acc = vis_faligndata(t7, acc); |
|
724 acc = vis_faligndata(t6, acc); |
|
725 acc = vis_faligndata(t5, acc); |
|
726 acc = vis_faligndata(t4, acc); |
|
727 acc = vis_faligndata(t3, acc); |
|
728 acc = vis_faligndata(t2, acc); |
|
729 acc = vis_faligndata(t1, acc); |
|
730 acc = vis_faligndata(t0, acc); |
|
731 *dp++ = acc; |
|
732 } |
|
733 |
|
734 if ((mlib_addr) dp <= (mlib_addr) dend) { |
|
735 |
|
736 num = (mlib_u16*) dend - (mlib_u16*) dp; |
|
737 sp += num; |
|
738 num ++; |
|
739 #pragma pipeloop(0) |
|
740 for (i = 0; i < num; i ++) { |
|
741 s0 = (mlib_s32) *sp; |
|
742 sp --; |
|
743 |
|
744 t0 = VIS_LD_U8_I(tab1, s0); |
|
745 acc = vis_faligndata(t0, acc); |
|
746 |
|
747 t0 = VIS_LD_U8_I(tab0, s0); |
|
748 acc = vis_faligndata(t0, acc); |
|
749 } |
|
750 |
|
751 emask = vis_edge16(dp, dend); |
|
752 vis_pst_16(acc, dp, emask); |
|
753 } |
|
754 } |
|
755 |
|
756 /***************************************************************/ |
|
757 void mlib_v_ImageLookUpSI_U8_U8_2_D1_SMALL(const mlib_u8 *src, |
|
758 mlib_u8 *dst, |
|
759 mlib_s32 xsize, |
|
760 const mlib_u8 **table) |
|
761 { |
|
762 mlib_u8 *sp; /* pointer to source data */ |
|
763 mlib_u32 s0, s1, s2, s3, s4; /* source data */ |
|
764 mlib_u8 *dl; /* pointer to start of destination */ |
|
765 mlib_u8 *dend; /* pointer to end of destination */ |
|
766 mlib_d64 *dp; /* aligned pointer to destination */ |
|
767 mlib_d64 t0, t1, t2; /* destination data */ |
|
768 mlib_d64 t3, t4, t5; /* destination data */ |
|
769 mlib_d64 t6, t7, acc; /* destination data */ |
|
770 mlib_s32 emask; /* edge mask */ |
|
771 mlib_s32 i, num; /* loop variable */ |
|
772 const mlib_u8 *tab0 = table[0]; |
|
773 const mlib_u8 *tab1 = table[1]; |
|
774 |
|
775 sp = (void *)src; |
|
776 dl = dst; |
|
777 |
|
778 dend = dl + 2 * xsize - 1; |
|
779 |
|
780 vis_alignaddr((void *) 0, 7); |
|
781 |
|
782 s0 = *sp++; |
|
783 *dl++ = tab0[s0]; |
|
784 dp = (mlib_d64 *) dl; |
|
785 xsize--; |
|
786 |
|
787 if (xsize >= 4) { |
|
788 |
|
789 s1 = sp[0]; |
|
790 s2 = sp[1]; |
|
791 s3 = sp[2]; |
|
792 s4 = sp[3]; |
|
793 sp += 4; |
|
794 |
|
795 #pragma pipeloop(0) |
|
796 for(i = 0; i <= xsize - 8; i+=4, sp+=4) { |
|
797 t7 = VIS_LD_U8_I(tab0, s4); |
|
798 t6 = VIS_LD_U8_I(tab1, s3); |
|
799 t5 = VIS_LD_U8_I(tab0, s3); |
|
800 t4 = VIS_LD_U8_I(tab1, s2); |
|
801 t3 = VIS_LD_U8_I(tab0, s2); |
|
802 t2 = VIS_LD_U8_I(tab1, s1); |
|
803 t1 = VIS_LD_U8_I(tab0, s1); |
|
804 t0 = VIS_LD_U8_I(tab1, s0); |
|
805 acc = vis_faligndata(t7, acc); |
|
806 acc = vis_faligndata(t6, acc); |
|
807 acc = vis_faligndata(t5, acc); |
|
808 acc = vis_faligndata(t4, acc); |
|
809 acc = vis_faligndata(t3, acc); |
|
810 acc = vis_faligndata(t2, acc); |
|
811 acc = vis_faligndata(t1, acc); |
|
812 acc = vis_faligndata(t0, acc); |
|
813 s0 = s4; |
|
814 s1 = sp[0]; |
|
815 s2 = sp[1]; |
|
816 s3 = sp[2]; |
|
817 s4 = sp[3]; |
|
818 *dp++ = acc; |
|
819 } |
|
820 |
|
821 t7 = VIS_LD_U8_I(tab0, s4); |
|
822 t6 = VIS_LD_U8_I(tab1, s3); |
|
823 t5 = VIS_LD_U8_I(tab0, s3); |
|
824 t4 = VIS_LD_U8_I(tab1, s2); |
|
825 t3 = VIS_LD_U8_I(tab0, s2); |
|
826 t2 = VIS_LD_U8_I(tab1, s1); |
|
827 t1 = VIS_LD_U8_I(tab0, s1); |
|
828 t0 = VIS_LD_U8_I(tab1, s0); |
|
829 acc = vis_faligndata(t7, acc); |
|
830 acc = vis_faligndata(t6, acc); |
|
831 acc = vis_faligndata(t5, acc); |
|
832 acc = vis_faligndata(t4, acc); |
|
833 acc = vis_faligndata(t3, acc); |
|
834 acc = vis_faligndata(t2, acc); |
|
835 acc = vis_faligndata(t1, acc); |
|
836 acc = vis_faligndata(t0, acc); |
|
837 s0 = s4; |
|
838 *dp++ = acc; |
|
839 } |
|
840 |
|
841 num = ((mlib_u8*) dend - (mlib_u8*) dp) >> 1; |
|
842 sp += num; |
|
843 num ++; |
|
844 |
|
845 #pragma pipeloop(0) |
|
846 for (i = 0; i < num; i ++) { |
|
847 s1 = (mlib_s32) *sp; |
|
848 sp --; |
|
849 |
|
850 t0 = VIS_LD_U8_I(tab1, s1); |
|
851 acc = vis_faligndata(t0, acc); |
|
852 |
|
853 t0 = VIS_LD_U8_I(tab0, s1); |
|
854 acc = vis_faligndata(t0, acc); |
|
855 } |
|
856 |
|
857 t0 = VIS_LD_U8_I(tab1, s0); |
|
858 acc = vis_faligndata(t0, acc); |
|
859 emask = vis_edge8(dp, dend); |
|
860 vis_pst_8(acc, dp, emask); |
|
861 } |
|
862 |
|
863 /***************************************************************/ |
|
864 void mlib_v_ImageLookUpSI_U8_U8_2(const mlib_u8 *src, |
|
865 mlib_s32 slb, |
|
866 mlib_u8 *dst, |
|
867 mlib_s32 dlb, |
|
868 mlib_s32 xsize, |
|
869 mlib_s32 ysize, |
|
870 const mlib_u8 **table) |
|
871 { |
|
872 if ((xsize * ysize) < 650) { |
|
873 mlib_u8 *sl; |
|
874 mlib_u8 *dl; |
|
875 mlib_s32 i, j; |
|
876 |
|
877 sl = (void *)src; |
|
878 dl = dst; |
|
879 |
|
880 /* row loop */ |
|
881 for (j = 0; j < ysize; j ++) { |
|
882 mlib_u8 *sp = sl; |
|
883 mlib_u8 *dp = dl; |
|
884 mlib_s32 off, s0, size = xsize; |
|
885 |
|
886 off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1; |
|
887 off = (off < size) ? off : size; |
|
888 |
|
889 for (i = 0; i < off; i++) { |
|
890 s0 = *sp++; |
|
891 *dp++ = table[0][s0]; |
|
892 *dp++ = table[1][s0]; |
|
893 size--; |
|
894 } |
|
895 |
|
896 if (size > 0) { |
|
897 |
|
898 if (((mlib_addr)dp & 1) == 0) { |
|
899 mlib_v_ImageLookUpSI_U8_U8_2_DstA8D1_SMALL(sp, dp, size, table); |
|
900 } else { |
|
901 mlib_v_ImageLookUpSI_U8_U8_2_D1_SMALL(sp, dp, size, table); |
|
902 } |
|
903 } |
|
904 |
|
905 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); |
|
906 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); |
|
907 } |
|
908 |
|
909 } else { |
|
910 mlib_u8 *sl; |
|
911 mlib_u8 *dl; |
|
912 mlib_u16 tab[256]; |
|
913 const mlib_u8 *tab0 = table[0]; |
|
914 const mlib_u8 *tab1 = table[1]; |
|
915 mlib_s32 i, j, s0, s1, s2; |
|
916 |
|
917 s0 = tab0[0]; |
|
918 s1 = tab1[0]; |
|
919 for (i = 1; i < 256; i++) { |
|
920 s2 = (s0 << 8) + s1; |
|
921 s0 = tab0[i]; |
|
922 s1 = tab1[i]; |
|
923 tab[i-1] = (mlib_u16)s2; |
|
924 } |
|
925 |
|
926 s2 = (s0 << 8) + s1; |
|
927 tab[255] = (mlib_u16)s2; |
|
928 |
|
929 sl = (void *)src; |
|
930 dl = dst; |
|
931 |
|
932 /* row loop */ |
|
933 for (j = 0; j < ysize; j ++) { |
|
934 mlib_u8 *sp = sl; |
|
935 mlib_u8 *dp = dl; |
|
936 mlib_s32 off, s0, size = xsize; |
|
937 |
|
938 if (((mlib_addr)dp & 1) == 0) { |
|
939 |
|
940 off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1; |
|
941 off = (off < size) ? off : size; |
|
942 |
|
943 for (i = 0; i < off; i++) { |
|
944 *(mlib_u16*)dp = tab[(*sp)]; |
|
945 dp += 2; |
|
946 size--; sp++; |
|
947 } |
|
948 |
|
949 if (size > 0) { |
|
950 |
|
951 off = (mlib_addr)sp & 3; |
|
952 |
|
953 if (off == 0) { |
|
954 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff0_D1(sp, dp, size, tab); |
|
955 } else if (off == 1) { |
|
956 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff1_D1(sp, dp, size, tab); |
|
957 } else if (off == 2) { |
|
958 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff2_D1(sp, dp, size, tab); |
|
959 } else { |
|
960 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff3_D1(sp, dp, size, tab); |
|
961 } |
|
962 } |
|
963 |
|
964 } else { |
|
965 |
|
966 off = ((4 - ((mlib_addr)sp & 3)) & 3); |
|
967 off = (off < size) ? off : size; |
|
968 |
|
969 for (i = 0; i < off; i++) { |
|
970 s0 = tab[(*sp)]; |
|
971 *dp++ = (s0 >> 8); |
|
972 *dp++ = (s0 & 0xFF); |
|
973 size--; sp++; |
|
974 } |
|
975 |
|
976 if (size > 0) { |
|
977 mlib_v_ImageLookUpSI_U8_U8_2_DstNonAl_D1(sp, dp, size, tab); |
|
978 } |
|
979 } |
|
980 |
|
981 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); |
|
982 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); |
|
983 } |
|
984 } |
|
985 } |
|
986 |
|
987 /***************************************************************/ |
|
988 void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff0_D1(const mlib_u8 *src, |
|
989 mlib_u8 *dst, |
|
990 mlib_s32 xsize, |
|
991 const mlib_d64 *table) |
|
992 { |
|
993 mlib_u8 *sp; /* pointer to source data */ |
|
994 mlib_u32 *sa; /* aligned pointer to source data */ |
|
995 mlib_u32 s0; /* source data */ |
|
996 mlib_u8 *dl; /* pointer to start of destination */ |
|
997 mlib_f32 *dp; /* aligned pointer to destination */ |
|
998 mlib_d64 t0, t1, t2, t3; /* destination data */ |
|
999 mlib_d64 acc0, acc1; /* destination data */ |
|
1000 mlib_s32 i; /* loop variable */ |
|
1001 mlib_u8 *ptr; |
|
1002 |
|
1003 dl = dst; |
|
1004 dp = (mlib_f32 *) dl; |
|
1005 sp = (void *)src; |
|
1006 sa = (mlib_u32*)sp; |
|
1007 |
|
1008 vis_alignaddr((void *) 0, 3); |
|
1009 |
|
1010 i = 0; |
|
1011 |
|
1012 if (xsize >= 4) { |
|
1013 |
|
1014 s0 = *sa++; |
|
1015 |
|
1016 #pragma pipeloop(0) |
|
1017 for(i = 0; i <= xsize - 8; i+=4, dp+=3) { |
|
1018 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 21) & 0x7F8 )); |
|
1019 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 )); |
|
1020 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); |
|
1021 t3 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); |
|
1022 acc0 = vis_faligndata(t0, t0); |
|
1023 acc0 = vis_faligndata(acc0, t1); |
|
1024 acc1 = vis_faligndata(acc0, acc0); |
|
1025 acc0 = vis_faligndata(acc0, t2); |
|
1026 acc1 = vis_faligndata(acc1, acc0); |
|
1027 acc0 = vis_faligndata(acc0, t3); |
|
1028 s0 = *sa++; |
|
1029 dp[0] = vis_read_lo(acc1); |
|
1030 dp[1] = vis_read_hi(acc0); |
|
1031 dp[2] = vis_read_lo(acc0); |
|
1032 } |
|
1033 |
|
1034 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 21) & 0x7F8 )); |
|
1035 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 )); |
|
1036 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); |
|
1037 t3 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); |
|
1038 acc0 = vis_faligndata(t0, t0); |
|
1039 acc0 = vis_faligndata(acc0, t1); |
|
1040 acc1 = vis_faligndata(acc0, acc0); |
|
1041 acc0 = vis_faligndata(acc0, t2); |
|
1042 acc1 = vis_faligndata(acc1, acc0); |
|
1043 acc0 = vis_faligndata(acc0, t3); |
|
1044 dp[0] = vis_read_lo(acc1); |
|
1045 dp[1] = vis_read_hi(acc0); |
|
1046 dp[2] = vis_read_lo(acc0); |
|
1047 dp += 3; |
|
1048 i += 4; |
|
1049 } |
|
1050 |
|
1051 dl = (mlib_u8*)dp; |
|
1052 |
|
1053 #pragma pipeloop(0) |
|
1054 for (; i < xsize; i++) { |
|
1055 ptr = (mlib_u8*)(table + src[i]); |
|
1056 dl[0] = ptr[0]; |
|
1057 dl[1] = ptr[1]; |
|
1058 dl[2] = ptr[2]; |
|
1059 dl += 3; |
|
1060 } |
|
1061 } |
|
1062 |
|
1063 /***************************************************************/ |
|
1064 void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff1_D1(const mlib_u8 *src, |
|
1065 mlib_u8 *dst, |
|
1066 mlib_s32 xsize, |
|
1067 const mlib_d64 *table) |
|
1068 { |
|
1069 mlib_u8 *sp; /* pointer to source data */ |
|
1070 mlib_u32 *sa; /* aligned pointer to source data */ |
|
1071 mlib_u32 s0, s1; /* source data */ |
|
1072 mlib_u8 *dl; /* pointer to start of destination */ |
|
1073 mlib_f32 *dp; /* aligned pointer to destination */ |
|
1074 mlib_d64 t0, t1, t2, t3; /* destination data */ |
|
1075 mlib_d64 acc0, acc1; /* destination data */ |
|
1076 mlib_s32 i; /* loop variable */ |
|
1077 mlib_u8 *ptr; |
|
1078 |
|
1079 dl = dst; |
|
1080 dp = (mlib_f32 *) dl; |
|
1081 sp = (void *)src; |
|
1082 sa = (mlib_u32*)(sp - 1); |
|
1083 |
|
1084 vis_alignaddr((void *) 0, 3); |
|
1085 |
|
1086 i = 0; |
|
1087 s0 = *sa++; |
|
1088 |
|
1089 if (xsize >= 4) { |
|
1090 |
|
1091 s1 = *sa++; |
|
1092 |
|
1093 #pragma pipeloop(0) |
|
1094 for(i = 0; i <= xsize - 8; i+=4, dp+=3) { |
|
1095 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 )); |
|
1096 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); |
|
1097 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); |
|
1098 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); |
|
1099 acc0 = vis_faligndata(t0, t0); |
|
1100 acc0 = vis_faligndata(acc0, t1); |
|
1101 acc1 = vis_faligndata(acc0, acc0); |
|
1102 acc0 = vis_faligndata(acc0, t2); |
|
1103 acc1 = vis_faligndata(acc1, acc0); |
|
1104 acc0 = vis_faligndata(acc0, t3); |
|
1105 s0 = s1; |
|
1106 s1 = *sa++; |
|
1107 dp[0] = vis_read_lo(acc1); |
|
1108 dp[1] = vis_read_hi(acc0); |
|
1109 dp[2] = vis_read_lo(acc0); |
|
1110 } |
|
1111 |
|
1112 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 )); |
|
1113 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); |
|
1114 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); |
|
1115 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); |
|
1116 acc0 = vis_faligndata(t0, t0); |
|
1117 acc0 = vis_faligndata(acc0, t1); |
|
1118 acc1 = vis_faligndata(acc0, acc0); |
|
1119 acc0 = vis_faligndata(acc0, t2); |
|
1120 acc1 = vis_faligndata(acc1, acc0); |
|
1121 acc0 = vis_faligndata(acc0, t3); |
|
1122 dp[0] = vis_read_lo(acc1); |
|
1123 dp[1] = vis_read_hi(acc0); |
|
1124 dp[2] = vis_read_lo(acc0); |
|
1125 dp += 3; |
|
1126 i += 4; |
|
1127 } |
|
1128 |
|
1129 dl = (mlib_u8*)dp; |
|
1130 |
|
1131 #pragma pipeloop(0) |
|
1132 for (; i < xsize; i++) { |
|
1133 ptr = (mlib_u8*)(table + src[i]); |
|
1134 dl[0] = ptr[0]; |
|
1135 dl[1] = ptr[1]; |
|
1136 dl[2] = ptr[2]; |
|
1137 dl += 3; |
|
1138 } |
|
1139 } |
|
1140 |
|
1141 /***************************************************************/ |
|
1142 void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff2_D1(const mlib_u8 *src, |
|
1143 mlib_u8 *dst, |
|
1144 mlib_s32 xsize, |
|
1145 const mlib_d64 *table) |
|
1146 { |
|
1147 mlib_u8 *sp; /* pointer to source data */ |
|
1148 mlib_u32 *sa; /* aligned pointer to source data */ |
|
1149 mlib_u32 s0, s1; /* source data */ |
|
1150 mlib_u8 *dl; /* pointer to start of destination */ |
|
1151 mlib_f32 *dp; /* aligned pointer to destination */ |
|
1152 mlib_d64 t0, t1, t2, t3; /* destination data */ |
|
1153 mlib_d64 acc0, acc1; /* destination data */ |
|
1154 mlib_s32 i; /* loop variable */ |
|
1155 mlib_u8 *ptr; |
|
1156 |
|
1157 dl = dst; |
|
1158 dp = (mlib_f32 *) dl; |
|
1159 sp = (void *)src; |
|
1160 sa = (mlib_u32*)(sp - 2); |
|
1161 |
|
1162 vis_alignaddr((void *) 0, 3); |
|
1163 |
|
1164 i = 0; |
|
1165 s0 = *sa++; |
|
1166 |
|
1167 if (xsize >= 4) { |
|
1168 |
|
1169 s1 = *sa++; |
|
1170 |
|
1171 #pragma pipeloop(0) |
|
1172 for(i = 0; i <= xsize - 8; i+=4, dp+=3) { |
|
1173 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); |
|
1174 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); |
|
1175 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); |
|
1176 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 )); |
|
1177 acc0 = vis_faligndata(t0, t0); |
|
1178 acc0 = vis_faligndata(acc0, t1); |
|
1179 acc1 = vis_faligndata(acc0, acc0); |
|
1180 acc0 = vis_faligndata(acc0, t2); |
|
1181 acc1 = vis_faligndata(acc1, acc0); |
|
1182 acc0 = vis_faligndata(acc0, t3); |
|
1183 s0 = s1; |
|
1184 s1 = *sa++; |
|
1185 dp[0] = vis_read_lo(acc1); |
|
1186 dp[1] = vis_read_hi(acc0); |
|
1187 dp[2] = vis_read_lo(acc0); |
|
1188 } |
|
1189 |
|
1190 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 )); |
|
1191 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); |
|
1192 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); |
|
1193 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 )); |
|
1194 acc0 = vis_faligndata(t0, t0); |
|
1195 acc0 = vis_faligndata(acc0, t1); |
|
1196 acc1 = vis_faligndata(acc0, acc0); |
|
1197 acc0 = vis_faligndata(acc0, t2); |
|
1198 acc1 = vis_faligndata(acc1, acc0); |
|
1199 acc0 = vis_faligndata(acc0, t3); |
|
1200 dp[0] = vis_read_lo(acc1); |
|
1201 dp[1] = vis_read_hi(acc0); |
|
1202 dp[2] = vis_read_lo(acc0); |
|
1203 dp += 3; |
|
1204 i += 4; |
|
1205 } |
|
1206 |
|
1207 dl = (mlib_u8*)dp; |
|
1208 |
|
1209 #pragma pipeloop(0) |
|
1210 for (; i < xsize; i++) { |
|
1211 ptr = (mlib_u8*)(table + src[i]); |
|
1212 dl[0] = ptr[0]; |
|
1213 dl[1] = ptr[1]; |
|
1214 dl[2] = ptr[2]; |
|
1215 dl += 3; |
|
1216 } |
|
1217 } |
|
1218 |
|
1219 /***************************************************************/ |
|
1220 void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff3_D1(const mlib_u8 *src, |
|
1221 mlib_u8 *dst, |
|
1222 mlib_s32 xsize, |
|
1223 const mlib_d64 *table) |
|
1224 { |
|
1225 mlib_u8 *sp; /* pointer to source data */ |
|
1226 mlib_u32 *sa; /* aligned pointer to source data */ |
|
1227 mlib_u32 s0, s1; /* source data */ |
|
1228 mlib_u8 *dl; /* pointer to start of destination */ |
|
1229 mlib_f32 *dp; /* aligned pointer to destination */ |
|
1230 mlib_d64 t0, t1, t2, t3; /* destination data */ |
|
1231 mlib_d64 acc0, acc1; /* destination data */ |
|
1232 mlib_s32 i; /* loop variable */ |
|
1233 mlib_u8 *ptr; |
|
1234 |
|
1235 dl = dst; |
|
1236 dp = (mlib_f32 *) dl; |
|
1237 sp = (void *)src; |
|
1238 sa = (mlib_u32*)(sp - 3); |
|
1239 |
|
1240 vis_alignaddr((void *) 0, 3); |
|
1241 |
|
1242 i = 0; |
|
1243 s0 = *sa++; |
|
1244 |
|
1245 if (xsize >= 4) { |
|
1246 |
|
1247 s1 = *sa++; |
|
1248 |
|
1249 #pragma pipeloop(0) |
|
1250 for(i = 0; i <= xsize - 8; i+=4, dp+=3) { |
|
1251 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); |
|
1252 t1 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); |
|
1253 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 )); |
|
1254 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 5) & 0x7F8 )); |
|
1255 acc0 = vis_faligndata(t0, t0); |
|
1256 acc0 = vis_faligndata(acc0, t1); |
|
1257 acc1 = vis_faligndata(acc0, acc0); |
|
1258 acc0 = vis_faligndata(acc0, t2); |
|
1259 acc1 = vis_faligndata(acc1, acc0); |
|
1260 acc0 = vis_faligndata(acc0, t3); |
|
1261 s0 = s1; |
|
1262 s1 = *sa++; |
|
1263 dp[0] = vis_read_lo(acc1); |
|
1264 dp[1] = vis_read_hi(acc0); |
|
1265 dp[2] = vis_read_lo(acc0); |
|
1266 } |
|
1267 |
|
1268 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 )); |
|
1269 t1 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 )); |
|
1270 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 )); |
|
1271 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 5) & 0x7F8 )); |
|
1272 acc0 = vis_faligndata(t0, t0); |
|
1273 acc0 = vis_faligndata(acc0, t1); |
|
1274 acc1 = vis_faligndata(acc0, acc0); |
|
1275 acc0 = vis_faligndata(acc0, t2); |
|
1276 acc1 = vis_faligndata(acc1, acc0); |
|
1277 acc0 = vis_faligndata(acc0, t3); |
|
1278 dp[0] = vis_read_lo(acc1); |
|
1279 dp[1] = vis_read_hi(acc0); |
|
1280 dp[2] = vis_read_lo(acc0); |
|
1281 dp += 3; |
|
1282 i += 4; |
|
1283 } |
|
1284 |
|
1285 dl = (mlib_u8*)dp; |
|
1286 |
|
1287 #pragma pipeloop(0) |
|
1288 for (; i < xsize; i++) { |
|
1289 ptr = (mlib_u8*)(table + src[i]); |
|
1290 dl[0] = ptr[0]; |
|
1291 dl[1] = ptr[1]; |
|
1292 dl[2] = ptr[2]; |
|
1293 dl += 3; |
|
1294 } |
|
1295 } |
|
1296 |
|
1297 /***************************************************************/ |
|
1298 void mlib_v_ImageLookUpSI_U8_U8_3_D1_SMALL(const mlib_u8 *src, |
|
1299 mlib_u8 *dst, |
|
1300 mlib_s32 xsize, |
|
1301 const mlib_u8 **table) |
|
1302 { |
|
1303 mlib_u8 *sp; /* pointer to source data */ |
|
1304 mlib_u8 *dl; /* pointer to start of destination */ |
|
1305 mlib_d64 *dp; /* aligned pointer to destination */ |
|
1306 mlib_d64 t0, t1, t2; /* destination data */ |
|
1307 mlib_d64 t3, t4, t5; /* destination data */ |
|
1308 mlib_d64 t6, t7; /* destination data */ |
|
1309 mlib_d64 acc0, acc1, acc2; /* destination data */ |
|
1310 mlib_s32 i; /* loop variable */ |
|
1311 const mlib_u8 *tab0 = table[0]; |
|
1312 const mlib_u8 *tab1 = table[1]; |
|
1313 const mlib_u8 *tab2 = table[2]; |
|
1314 mlib_u32 s00, s01, s02, s03; |
|
1315 mlib_u32 s10, s11, s12, s13; |
|
1316 |
|
1317 sp = (void *)src; |
|
1318 dl = dst; |
|
1319 dp = (mlib_d64 *) dl; |
|
1320 |
|
1321 vis_alignaddr((void *) 0, 7); |
|
1322 |
|
1323 i = 0; |
|
1324 |
|
1325 if (xsize >= 8) { |
|
1326 |
|
1327 s00 = sp[0]; |
|
1328 s01 = sp[1]; |
|
1329 s02 = sp[2]; |
|
1330 s03 = sp[3]; |
|
1331 s10 = sp[4]; |
|
1332 s11 = sp[5]; |
|
1333 s12 = sp[6]; |
|
1334 s13 = sp[7]; |
|
1335 sp += 8; |
|
1336 |
|
1337 #pragma pipeloop(0) |
|
1338 for(i = 0; i <= xsize - 16; i+=8, sp+=8) { |
|
1339 t7 = VIS_LD_U8_I(tab1, s02); |
|
1340 t6 = VIS_LD_U8_I(tab0, s02); |
|
1341 t5 = VIS_LD_U8_I(tab2, s01); |
|
1342 t4 = VIS_LD_U8_I(tab1, s01); |
|
1343 t3 = VIS_LD_U8_I(tab0, s01); |
|
1344 t2 = VIS_LD_U8_I(tab2, s00); |
|
1345 t1 = VIS_LD_U8_I(tab1, s00); |
|
1346 t0 = VIS_LD_U8_I(tab0, s00); |
|
1347 acc0 = vis_faligndata(t7, acc0); |
|
1348 acc0 = vis_faligndata(t6, acc0); |
|
1349 acc0 = vis_faligndata(t5, acc0); |
|
1350 acc0 = vis_faligndata(t4, acc0); |
|
1351 acc0 = vis_faligndata(t3, acc0); |
|
1352 acc0 = vis_faligndata(t2, acc0); |
|
1353 acc0 = vis_faligndata(t1, acc0); |
|
1354 acc0 = vis_faligndata(t0, acc0); |
|
1355 t7 = VIS_LD_U8_I(tab0, s11); |
|
1356 t6 = VIS_LD_U8_I(tab2, s10); |
|
1357 t5 = VIS_LD_U8_I(tab1, s10); |
|
1358 t4 = VIS_LD_U8_I(tab0, s10); |
|
1359 t3 = VIS_LD_U8_I(tab2, s03); |
|
1360 t2 = VIS_LD_U8_I(tab1, s03); |
|
1361 t1 = VIS_LD_U8_I(tab0, s03); |
|
1362 t0 = VIS_LD_U8_I(tab2, s02); |
|
1363 acc1 = vis_faligndata(t7, acc1); |
|
1364 acc1 = vis_faligndata(t6, acc1); |
|
1365 acc1 = vis_faligndata(t5, acc1); |
|
1366 acc1 = vis_faligndata(t4, acc1); |
|
1367 acc1 = vis_faligndata(t3, acc1); |
|
1368 acc1 = vis_faligndata(t2, acc1); |
|
1369 acc1 = vis_faligndata(t1, acc1); |
|
1370 acc1 = vis_faligndata(t0, acc1); |
|
1371 t7 = VIS_LD_U8_I(tab2, s13); |
|
1372 t6 = VIS_LD_U8_I(tab1, s13); |
|
1373 t5 = VIS_LD_U8_I(tab0, s13); |
|
1374 t4 = VIS_LD_U8_I(tab2, s12); |
|
1375 t3 = VIS_LD_U8_I(tab1, s12); |
|
1376 t2 = VIS_LD_U8_I(tab0, s12); |
|
1377 t1 = VIS_LD_U8_I(tab2, s11); |
|
1378 t0 = VIS_LD_U8_I(tab1, s11); |
|
1379 acc2 = vis_faligndata(t7, acc2); |
|
1380 acc2 = vis_faligndata(t6, acc2); |
|
1381 acc2 = vis_faligndata(t5, acc2); |
|
1382 acc2 = vis_faligndata(t4, acc2); |
|
1383 acc2 = vis_faligndata(t3, acc2); |
|
1384 acc2 = vis_faligndata(t2, acc2); |
|
1385 acc2 = vis_faligndata(t1, acc2); |
|
1386 acc2 = vis_faligndata(t0, acc2); |
|
1387 s00 = sp[0]; |
|
1388 s01 = sp[1]; |
|
1389 s02 = sp[2]; |
|
1390 s03 = sp[3]; |
|
1391 s10 = sp[4]; |
|
1392 s11 = sp[5]; |
|
1393 s12 = sp[6]; |
|
1394 s13 = sp[7]; |
|
1395 *dp++ = acc0; |
|
1396 *dp++ = acc1; |
|
1397 *dp++ = acc2; |
|
1398 } |
|
1399 |
|
1400 t7 = VIS_LD_U8_I(tab1, s02); |
|
1401 t6 = VIS_LD_U8_I(tab0, s02); |
|
1402 t5 = VIS_LD_U8_I(tab2, s01); |
|
1403 t4 = VIS_LD_U8_I(tab1, s01); |
|
1404 t3 = VIS_LD_U8_I(tab0, s01); |
|
1405 t2 = VIS_LD_U8_I(tab2, s00); |
|
1406 t1 = VIS_LD_U8_I(tab1, s00); |
|
1407 t0 = VIS_LD_U8_I(tab0, s00); |
|
1408 acc0 = vis_faligndata(t7, acc0); |
|
1409 acc0 = vis_faligndata(t6, acc0); |
|
1410 acc0 = vis_faligndata(t5, acc0); |
|
1411 acc0 = vis_faligndata(t4, acc0); |
|
1412 acc0 = vis_faligndata(t3, acc0); |
|
1413 acc0 = vis_faligndata(t2, acc0); |
|
1414 acc0 = vis_faligndata(t1, acc0); |
|
1415 acc0 = vis_faligndata(t0, acc0); |
|
1416 t7 = VIS_LD_U8_I(tab0, s11); |
|
1417 t6 = VIS_LD_U8_I(tab2, s10); |
|
1418 t5 = VIS_LD_U8_I(tab1, s10); |
|
1419 t4 = VIS_LD_U8_I(tab0, s10); |
|
1420 t3 = VIS_LD_U8_I(tab2, s03); |
|
1421 t2 = VIS_LD_U8_I(tab1, s03); |
|
1422 t1 = VIS_LD_U8_I(tab0, s03); |
|
1423 t0 = VIS_LD_U8_I(tab2, s02); |
|
1424 acc1 = vis_faligndata(t7, acc1); |
|
1425 acc1 = vis_faligndata(t6, acc1); |
|
1426 acc1 = vis_faligndata(t5, acc1); |
|
1427 acc1 = vis_faligndata(t4, acc1); |
|
1428 acc1 = vis_faligndata(t3, acc1); |
|
1429 acc1 = vis_faligndata(t2, acc1); |
|
1430 acc1 = vis_faligndata(t1, acc1); |
|
1431 acc1 = vis_faligndata(t0, acc1); |
|
1432 t7 = VIS_LD_U8_I(tab2, s13); |
|
1433 t6 = VIS_LD_U8_I(tab1, s13); |
|
1434 t5 = VIS_LD_U8_I(tab0, s13); |
|
1435 t4 = VIS_LD_U8_I(tab2, s12); |
|
1436 t3 = VIS_LD_U8_I(tab1, s12); |
|
1437 t2 = VIS_LD_U8_I(tab0, s12); |
|
1438 t1 = VIS_LD_U8_I(tab2, s11); |
|
1439 t0 = VIS_LD_U8_I(tab1, s11); |
|
1440 acc2 = vis_faligndata(t7, acc2); |
|
1441 acc2 = vis_faligndata(t6, acc2); |
|
1442 acc2 = vis_faligndata(t5, acc2); |
|
1443 acc2 = vis_faligndata(t4, acc2); |
|
1444 acc2 = vis_faligndata(t3, acc2); |
|
1445 acc2 = vis_faligndata(t2, acc2); |
|
1446 acc2 = vis_faligndata(t1, acc2); |
|
1447 acc2 = vis_faligndata(t0, acc2); |
|
1448 *dp++ = acc0; |
|
1449 *dp++ = acc1; |
|
1450 *dp++ = acc2; |
|
1451 i += 8; |
|
1452 } |
|
1453 |
|
1454 dl = (mlib_u8*)dp; |
|
1455 |
|
1456 #pragma pipeloop(0) |
|
1457 for (; i < xsize; i++) { |
|
1458 s00 = sp[0]; |
|
1459 dl[0] = tab0[s00]; |
|
1460 dl[1] = tab1[s00]; |
|
1461 dl[2] = tab2[s00]; |
|
1462 dl += 3; sp ++; |
|
1463 } |
|
1464 } |
|
1465 |
|
1466 /***************************************************************/ |
|
1467 void mlib_v_ImageLookUpSI_U8_U8_3(const mlib_u8 *src, |
|
1468 mlib_s32 slb, |
|
1469 mlib_u8 *dst, |
|
1470 mlib_s32 dlb, |
|
1471 mlib_s32 xsize, |
|
1472 mlib_s32 ysize, |
|
1473 const mlib_u8 **table) |
|
1474 { |
|
1475 if ((xsize * ysize) < 650) { |
|
1476 mlib_u8 *sl; |
|
1477 mlib_u8 *dl; |
|
1478 mlib_s32 i, j; |
|
1479 const mlib_u8 *tab0 = table[0]; |
|
1480 const mlib_u8 *tab1 = table[1]; |
|
1481 const mlib_u8 *tab2 = table[2]; |
|
1482 |
|
1483 sl = (void *)src; |
|
1484 dl = dst; |
|
1485 |
|
1486 /* row loop */ |
|
1487 for (j = 0; j < ysize; j ++) { |
|
1488 mlib_u8 *sp = sl; |
|
1489 mlib_u8 *dp = dl; |
|
1490 mlib_s32 off, s0, size = xsize; |
|
1491 |
|
1492 off = (mlib_addr)dp & 7; |
|
1493 off = (off * 5) & 7; |
|
1494 off = (off < size) ? off : size; |
|
1495 |
|
1496 for (i = 0; i < off; i++) { |
|
1497 s0 = *sp++; |
|
1498 *dp++ = tab0[s0]; |
|
1499 *dp++ = tab1[s0]; |
|
1500 *dp++ = tab2[s0]; |
|
1501 size--; |
|
1502 } |
|
1503 |
|
1504 if (size > 0) { |
|
1505 mlib_v_ImageLookUpSI_U8_U8_3_D1_SMALL(sp, dp, size, table); |
|
1506 } |
|
1507 |
|
1508 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); |
|
1509 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); |
|
1510 } |
|
1511 |
|
1512 } else { |
|
1513 mlib_u8 *sl; |
|
1514 mlib_u8 *dl; |
|
1515 mlib_u32 tab[512]; |
|
1516 const mlib_u8 *tab0 = table[0]; |
|
1517 const mlib_u8 *tab1 = table[1]; |
|
1518 const mlib_u8 *tab2 = table[2]; |
|
1519 mlib_s32 i, j; |
|
1520 mlib_u32 s0, s1, s2, s3; |
|
1521 |
|
1522 s0 = tab0[0]; |
|
1523 s1 = tab1[0]; |
|
1524 s2 = tab2[0]; |
|
1525 for (i = 1; i < 256; i++) { |
|
1526 s3 = (s0 << 24) + (s1 << 16) + (s2 << 8); |
|
1527 s0 = tab0[i]; |
|
1528 s1 = tab1[i]; |
|
1529 s2 = tab2[i]; |
|
1530 tab[2*i-2] = s3; |
|
1531 } |
|
1532 |
|
1533 s3 = (s0 << 24) + (s1 << 16) + (s2 << 8); |
|
1534 tab[510] = s3; |
|
1535 |
|
1536 sl = (void *)src; |
|
1537 dl = dst; |
|
1538 |
|
1539 /* row loop */ |
|
1540 for (j = 0; j < ysize; j ++) { |
|
1541 mlib_u8 *sp = sl; |
|
1542 mlib_u8 *dp = dl; |
|
1543 mlib_s32 off, size = xsize; |
|
1544 mlib_u8 *ptr; |
|
1545 |
|
1546 off = ((mlib_addr)dp & 3); |
|
1547 off = (off < size) ? off : size; |
|
1548 |
|
1549 #pragma pipeloop(0) |
|
1550 for (i = 0; i < off; i++) { |
|
1551 ptr = (mlib_u8*)(tab + 2*sp[i]); |
|
1552 dp[0] = ptr[0]; |
|
1553 dp[1] = ptr[1]; |
|
1554 dp[2] = ptr[2]; |
|
1555 dp += 3; |
|
1556 } |
|
1557 |
|
1558 size -= off; |
|
1559 sp += off; |
|
1560 |
|
1561 if (size > 0) { |
|
1562 off = (mlib_addr)sp & 3; |
|
1563 |
|
1564 if (off == 0) { |
|
1565 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff0_D1(sp, dp, size, (mlib_d64*)tab); |
|
1566 } else if (off == 1) { |
|
1567 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff1_D1(sp, dp, size, (mlib_d64*)tab); |
|
1568 } else if (off == 2) { |
|
1569 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff2_D1(sp, dp, size, (mlib_d64*)tab); |
|
1570 } else if (off == 3) { |
|
1571 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff3_D1(sp, dp, size, (mlib_d64*)tab); |
|
1572 } |
|
1573 } |
|
1574 |
|
1575 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); |
|
1576 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); |
|
1577 } |
|
1578 } |
|
1579 } |
|
1580 |
|
1581 /***************************************************************/ |
|
1582 void mlib_v_ImageLookUpSI_U8_U8_4_SrcOff0_D1(const mlib_u8 *src, |
|
1583 mlib_u8 *dst, |
|
1584 mlib_s32 xsize, |
|
1585 const mlib_f32 *table) |
|
1586 { |
|
1587 mlib_u32 *sa; /* aligned pointer to source data */ |
|
1588 mlib_u8 *sp; /* pointer to source data */ |
|
1589 mlib_u32 s0; /* source data */ |
|
1590 mlib_f32 *dp; /* aligned pointer to destination */ |
|
1591 mlib_f32 acc0, acc1; /* destination data */ |
|
1592 mlib_f32 acc2, acc3; /* destination data */ |
|
1593 mlib_s32 i; /* loop variable */ |
|
1594 mlib_u32 s00, s01, s02, s03; |
|
1595 |
|
1596 sa = (mlib_u32*)src; |
|
1597 dp = (mlib_f32 *) dst; |
|
1598 |
|
1599 i = 0; |
|
1600 |
|
1601 if (xsize >= 4) { |
|
1602 |
|
1603 s0 = *sa++; |
|
1604 s00 = (s0 >> 22) & 0x3FC; |
|
1605 s01 = (s0 >> 14) & 0x3FC; |
|
1606 |
|
1607 #pragma pipeloop(0) |
|
1608 for(i = 0; i <= xsize - 8; i+=4, dp += 4) { |
|
1609 s02 = (s0 >> 6) & 0x3FC; |
|
1610 s03 = (s0 << 2) & 0x3FC; |
|
1611 acc0 = *(mlib_f32*)((mlib_u8*)table + s00); |
|
1612 acc1 = *(mlib_f32*)((mlib_u8*)table + s01); |
|
1613 acc2 = *(mlib_f32*)((mlib_u8*)table + s02); |
|
1614 acc3 = *(mlib_f32*)((mlib_u8*)table + s03); |
|
1615 s0 = *sa++; |
|
1616 s00 = (s0 >> 22) & 0x3FC; |
|
1617 s01 = (s0 >> 14) & 0x3FC; |
|
1618 dp[0] = acc0; |
|
1619 dp[1] = acc1; |
|
1620 dp[2] = acc2; |
|
1621 dp[3] = acc3; |
|
1622 } |
|
1623 |
|
1624 s02 = (s0 >> 6) & 0x3FC; |
|
1625 s03 = (s0 << 2) & 0x3FC; |
|
1626 acc0 = *(mlib_f32*)((mlib_u8*)table + s00); |
|
1627 acc1 = *(mlib_f32*)((mlib_u8*)table + s01); |
|
1628 acc2 = *(mlib_f32*)((mlib_u8*)table + s02); |
|
1629 acc3 = *(mlib_f32*)((mlib_u8*)table + s03); |
|
1630 dp[0] = acc0; |
|
1631 dp[1] = acc1; |
|
1632 dp[2] = acc2; |
|
1633 dp[3] = acc3; |
|
1634 dp += 4; |
|
1635 i += 4; |
|
1636 } |
|
1637 |
|
1638 sp = (mlib_u8*)sa; |
|
1639 |
|
1640 if ( i <= xsize - 2) { |
|
1641 *dp++ = table[sp[0]]; |
|
1642 *dp++ = table[sp[1]]; |
|
1643 i+=2; sp += 2; |
|
1644 } |
|
1645 |
|
1646 if ( i < xsize) *dp = table[sp[0]]; |
|
1647 } |
|
1648 |
|
1649 /***************************************************************/ |
|
1650 void mlib_v_ImageLookUpSI_U8_U8_4_DstNonAl_D1(const mlib_u8 *src, |
|
1651 mlib_u8 *dst, |
|
1652 mlib_s32 xsize, |
|
1653 const mlib_f32 *table) |
|
1654 { |
|
1655 mlib_u32 *sa; /* aligned pointer to source data */ |
|
1656 mlib_u8 *sp; /* pointer to source data */ |
|
1657 mlib_u32 s0; /* source data */ |
|
1658 mlib_u8 *dl; /* pointer to start of destination */ |
|
1659 mlib_d64 *dp; /* aligned pointer to destination */ |
|
1660 mlib_d64 acc0, acc1, acc2; /* destination data */ |
|
1661 mlib_s32 i; /* loop variable */ |
|
1662 mlib_u8 *dend; /* pointer to end of destination */ |
|
1663 mlib_s32 emask; /* edge mask */ |
|
1664 mlib_s32 off; |
|
1665 mlib_u32 s00, s01, s02, s03; |
|
1666 |
|
1667 sa = (mlib_u32*)src; |
|
1668 sp = (void *)src; |
|
1669 dl = dst; |
|
1670 dend = dl + (xsize << 2) - 1; |
|
1671 dp = (mlib_d64 *) ((mlib_addr) dl & (~7)); |
|
1672 off = (mlib_addr) dp - (mlib_addr) dl; |
|
1673 vis_alignaddr(dp, off); |
|
1674 |
|
1675 emask = vis_edge8(dl, dend); |
|
1676 acc0 = vis_freg_pair(table[sp[0]], table[sp[1]]); |
|
1677 vis_pst_8(vis_faligndata(acc0, acc0), dp++, emask); |
|
1678 sp += 2; |
|
1679 |
|
1680 xsize -= 2; |
|
1681 |
|
1682 if (xsize >= 2) { |
|
1683 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]); |
|
1684 *dp++ = vis_faligndata(acc0, acc1); |
|
1685 acc0 = acc1; |
|
1686 sp += 2; xsize -= 2; |
|
1687 } |
|
1688 |
|
1689 sa++; |
|
1690 |
|
1691 i = 0; |
|
1692 |
|
1693 if (xsize >= 4) { |
|
1694 |
|
1695 s0 = *sa++; |
|
1696 s00 = (s0 >> 22) & 0x3FC; |
|
1697 s01 = (s0 >> 14) & 0x3FC; |
|
1698 |
|
1699 #pragma pipeloop(0) |
|
1700 for(i = 0; i <= xsize - 8; i+=4, dp += 2) { |
|
1701 s02 = (s0 >> 6) & 0x3FC; |
|
1702 s03 = (s0 << 2) & 0x3FC; |
|
1703 acc1 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s00), |
|
1704 *(mlib_f32*)((mlib_u8*)table + s01)); |
|
1705 acc2 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s02), |
|
1706 *(mlib_f32*)((mlib_u8*)table + s03)); |
|
1707 s0 = *sa++; |
|
1708 s00 = (s0 >> 22) & 0x3FC; |
|
1709 s01 = (s0 >> 14) & 0x3FC; |
|
1710 dp[0] = vis_faligndata(acc0, acc1); |
|
1711 dp[1] = vis_faligndata(acc1, acc2); |
|
1712 acc0 = acc2; |
|
1713 } |
|
1714 |
|
1715 s02 = (s0 >> 6) & 0x3FC; |
|
1716 s03 = (s0 << 2) & 0x3FC; |
|
1717 acc1 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s00), |
|
1718 *(mlib_f32*)((mlib_u8*)table + s01)); |
|
1719 acc2 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s02), |
|
1720 *(mlib_f32*)((mlib_u8*)table + s03)); |
|
1721 dp[0] = vis_faligndata(acc0, acc1); |
|
1722 dp[1] = vis_faligndata(acc1, acc2); |
|
1723 acc0 = acc2; |
|
1724 sp = (mlib_u8*)sa; |
|
1725 dp += 2; |
|
1726 i += 4; |
|
1727 } |
|
1728 |
|
1729 if ( i <= xsize - 2) { |
|
1730 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]); |
|
1731 *dp++ = vis_faligndata(acc0, acc1); |
|
1732 acc0 = acc1; |
|
1733 i+=2; sp += 2; |
|
1734 } |
|
1735 |
|
1736 if ((mlib_addr) dp <= (mlib_addr) dend) { |
|
1737 emask = vis_edge8(dp, dend); |
|
1738 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]); |
|
1739 vis_pst_8(vis_faligndata(acc0, acc1), dp++, emask); |
|
1740 } |
|
1741 |
|
1742 if ((mlib_addr) dp <= (mlib_addr) dend) { |
|
1743 emask = vis_edge8(dp, dend); |
|
1744 vis_pst_8(vis_faligndata(acc1, acc1), dp++, emask); |
|
1745 } |
|
1746 } |
|
1747 |
|
1748 /***************************************************************/ |
|
1749 void mlib_v_ImageLookUpSI_U8_U8_4_DstOff0_D1_SMALL(const mlib_u8 *src, |
|
1750 mlib_u8 *dst, |
|
1751 mlib_s32 xsize, |
|
1752 const mlib_u8 **table) |
|
1753 { |
|
1754 mlib_u8 *sp; /* pointer to source data */ |
|
1755 mlib_u32 s0, s1; /* source data */ |
|
1756 mlib_u8 *dl; /* pointer to start of destination */ |
|
1757 mlib_d64 *dp; /* aligned pointer to destination */ |
|
1758 mlib_d64 t0, t1, t2; /* destination data */ |
|
1759 mlib_d64 t3, t4, t5; /* destination data */ |
|
1760 mlib_d64 t6, t7, acc; /* destination data */ |
|
1761 mlib_s32 i; /* loop variable */ |
|
1762 const mlib_u8 *tab0 = table[0]; |
|
1763 const mlib_u8 *tab1 = table[1]; |
|
1764 const mlib_u8 *tab2 = table[2]; |
|
1765 const mlib_u8 *tab3 = table[3]; |
|
1766 |
|
1767 sp = (void *)src; |
|
1768 dl = dst; |
|
1769 dp = (mlib_d64 *) dl; |
|
1770 |
|
1771 vis_alignaddr((void *) 0, 7); |
|
1772 |
|
1773 if (xsize >= 2) { |
|
1774 |
|
1775 s0 = sp[0]; |
|
1776 s1 = sp[1]; |
|
1777 sp += 2; |
|
1778 |
|
1779 #pragma pipeloop(0) |
|
1780 for(i = 0; i <= xsize - 4; i+=2, sp+=2) { |
|
1781 t7 = VIS_LD_U8_I(tab3, s1); |
|
1782 t6 = VIS_LD_U8_I(tab2, s1); |
|
1783 t5 = VIS_LD_U8_I(tab1, s1); |
|
1784 t4 = VIS_LD_U8_I(tab0, s1); |
|
1785 t3 = VIS_LD_U8_I(tab3, s0); |
|
1786 t2 = VIS_LD_U8_I(tab2, s0); |
|
1787 t1 = VIS_LD_U8_I(tab1, s0); |
|
1788 t0 = VIS_LD_U8_I(tab0, s0); |
|
1789 acc = vis_faligndata(t7, acc); |
|
1790 acc = vis_faligndata(t6, acc); |
|
1791 acc = vis_faligndata(t5, acc); |
|
1792 acc = vis_faligndata(t4, acc); |
|
1793 acc = vis_faligndata(t3, acc); |
|
1794 acc = vis_faligndata(t2, acc); |
|
1795 acc = vis_faligndata(t1, acc); |
|
1796 acc = vis_faligndata(t0, acc); |
|
1797 s0 = sp[0]; |
|
1798 s1 = sp[1]; |
|
1799 *dp++ = acc; |
|
1800 } |
|
1801 |
|
1802 t7 = VIS_LD_U8_I(tab3, s1); |
|
1803 t6 = VIS_LD_U8_I(tab2, s1); |
|
1804 t5 = VIS_LD_U8_I(tab1, s1); |
|
1805 t4 = VIS_LD_U8_I(tab0, s1); |
|
1806 t3 = VIS_LD_U8_I(tab3, s0); |
|
1807 t2 = VIS_LD_U8_I(tab2, s0); |
|
1808 t1 = VIS_LD_U8_I(tab1, s0); |
|
1809 t0 = VIS_LD_U8_I(tab0, s0); |
|
1810 acc = vis_faligndata(t7, acc); |
|
1811 acc = vis_faligndata(t6, acc); |
|
1812 acc = vis_faligndata(t5, acc); |
|
1813 acc = vis_faligndata(t4, acc); |
|
1814 acc = vis_faligndata(t3, acc); |
|
1815 acc = vis_faligndata(t2, acc); |
|
1816 acc = vis_faligndata(t1, acc); |
|
1817 acc = vis_faligndata(t0, acc); |
|
1818 *dp++ = acc; |
|
1819 } |
|
1820 |
|
1821 if ((xsize & 1) != 0) { |
|
1822 s0 = sp[0]; |
|
1823 t7 = VIS_LD_U8_I(tab3, s0); |
|
1824 t6 = VIS_LD_U8_I(tab2, s0); |
|
1825 t5 = VIS_LD_U8_I(tab1, s0); |
|
1826 t4 = VIS_LD_U8_I(tab0, s0); |
|
1827 acc = vis_faligndata(t7, acc); |
|
1828 acc = vis_faligndata(t6, acc); |
|
1829 acc = vis_faligndata(t5, acc); |
|
1830 acc = vis_faligndata(t4, acc); |
|
1831 *(mlib_f32*)dp = vis_read_hi(acc); |
|
1832 } |
|
1833 } |
|
1834 |
|
1835 /***************************************************************/ |
|
1836 void mlib_v_ImageLookUpSI_U8_U8_4_DstOff1_D1_SMALL(const mlib_u8 *src, |
|
1837 mlib_u8 *dst, |
|
1838 mlib_s32 xsize, |
|
1839 const mlib_u8 **table) |
|
1840 { |
|
1841 mlib_u8 *sp; /* pointer to source data */ |
|
1842 mlib_u32 s0, s1, s2; /* source data */ |
|
1843 mlib_u8 *dl; /* pointer to start of destination */ |
|
1844 mlib_d64 *dp; /* aligned pointer to destination */ |
|
1845 mlib_d64 t0, t1, t2; /* destination data */ |
|
1846 mlib_d64 t3, t4, t5; /* destination data */ |
|
1847 mlib_d64 t6, t7, acc; /* destination data */ |
|
1848 mlib_s32 i; /* loop variable */ |
|
1849 const mlib_u8 *tab0 = table[0]; |
|
1850 const mlib_u8 *tab1 = table[1]; |
|
1851 const mlib_u8 *tab2 = table[2]; |
|
1852 const mlib_u8 *tab3 = table[3]; |
|
1853 |
|
1854 sp = (void *)src; |
|
1855 dl = dst; |
|
1856 dp = (mlib_d64 *) dl; |
|
1857 |
|
1858 vis_alignaddr((void *) 0, 7); |
|
1859 |
|
1860 s0 = *sp++; |
|
1861 |
|
1862 if (xsize >= 2) { |
|
1863 |
|
1864 s1 = sp[0]; |
|
1865 s2 = sp[1]; |
|
1866 sp += 2; |
|
1867 |
|
1868 #pragma pipeloop(0) |
|
1869 for(i = 0; i <= xsize - 4; i+=2, sp+=2) { |
|
1870 t7 = VIS_LD_U8_I(tab0, s2); |
|
1871 t6 = VIS_LD_U8_I(tab3, s1); |
|
1872 t5 = VIS_LD_U8_I(tab2, s1); |
|
1873 t4 = VIS_LD_U8_I(tab1, s1); |
|
1874 t3 = VIS_LD_U8_I(tab0, s1); |
|
1875 t2 = VIS_LD_U8_I(tab3, s0); |
|
1876 t1 = VIS_LD_U8_I(tab2, s0); |
|
1877 t0 = VIS_LD_U8_I(tab1, s0); |
|
1878 acc = vis_faligndata(t7, acc); |
|
1879 acc = vis_faligndata(t6, acc); |
|
1880 acc = vis_faligndata(t5, acc); |
|
1881 acc = vis_faligndata(t4, acc); |
|
1882 acc = vis_faligndata(t3, acc); |
|
1883 acc = vis_faligndata(t2, acc); |
|
1884 acc = vis_faligndata(t1, acc); |
|
1885 acc = vis_faligndata(t0, acc); |
|
1886 s0 = s2; |
|
1887 s1 = sp[0]; |
|
1888 s2 = sp[1]; |
|
1889 *dp++ = acc; |
|
1890 } |
|
1891 |
|
1892 t7 = VIS_LD_U8_I(tab0, s2); |
|
1893 t6 = VIS_LD_U8_I(tab3, s1); |
|
1894 t5 = VIS_LD_U8_I(tab2, s1); |
|
1895 t4 = VIS_LD_U8_I(tab1, s1); |
|
1896 t3 = VIS_LD_U8_I(tab0, s1); |
|
1897 t2 = VIS_LD_U8_I(tab3, s0); |
|
1898 t1 = VIS_LD_U8_I(tab2, s0); |
|
1899 t0 = VIS_LD_U8_I(tab1, s0); |
|
1900 acc = vis_faligndata(t7, acc); |
|
1901 acc = vis_faligndata(t6, acc); |
|
1902 acc = vis_faligndata(t5, acc); |
|
1903 acc = vis_faligndata(t4, acc); |
|
1904 acc = vis_faligndata(t3, acc); |
|
1905 acc = vis_faligndata(t2, acc); |
|
1906 acc = vis_faligndata(t1, acc); |
|
1907 acc = vis_faligndata(t0, acc); |
|
1908 s0 = s2; |
|
1909 *dp++ = acc; |
|
1910 } |
|
1911 |
|
1912 dl = (mlib_u8*)dp; |
|
1913 |
|
1914 if ((xsize & 1) != 0) { |
|
1915 s1 = sp[0]; |
|
1916 t7 = VIS_LD_U8_I(tab0, s1); |
|
1917 t6 = VIS_LD_U8_I(tab3, s0); |
|
1918 t5 = VIS_LD_U8_I(tab2, s0); |
|
1919 t4 = VIS_LD_U8_I(tab1, s0); |
|
1920 acc = vis_faligndata(t7, acc); |
|
1921 acc = vis_faligndata(t6, acc); |
|
1922 acc = vis_faligndata(t5, acc); |
|
1923 acc = vis_faligndata(t4, acc); |
|
1924 *(mlib_f32*)dl = vis_read_hi(acc); |
|
1925 dl += 4; |
|
1926 s0 = s1; |
|
1927 } |
|
1928 |
|
1929 dl[0] = tab1[s0]; |
|
1930 dl[1] = tab2[s0]; |
|
1931 dl[2] = tab3[s0]; |
|
1932 } |
|
1933 |
|
1934 /***************************************************************/ |
|
1935 void mlib_v_ImageLookUpSI_U8_U8_4_DstOff2_D1_SMALL(const mlib_u8 *src, |
|
1936 mlib_u8 *dst, |
|
1937 mlib_s32 xsize, |
|
1938 const mlib_u8 **table) |
|
1939 { |
|
1940 mlib_u8 *sp; /* pointer to source data */ |
|
1941 mlib_u32 s0, s1, s2; /* source data */ |
|
1942 mlib_u8 *dl; /* pointer to start of destination */ |
|
1943 mlib_d64 *dp; /* aligned pointer to destination */ |
|
1944 mlib_d64 t0, t1, t2; /* destination data */ |
|
1945 mlib_d64 t3, t4, t5; /* destination data */ |
|
1946 mlib_d64 t6, t7, acc; /* destination data */ |
|
1947 mlib_s32 i; /* loop variable */ |
|
1948 const mlib_u8 *tab0 = table[0]; |
|
1949 const mlib_u8 *tab1 = table[1]; |
|
1950 const mlib_u8 *tab2 = table[2]; |
|
1951 const mlib_u8 *tab3 = table[3]; |
|
1952 |
|
1953 sp = (void *)src; |
|
1954 dl = dst; |
|
1955 dp = (mlib_d64 *) dl; |
|
1956 |
|
1957 vis_alignaddr((void *) 0, 7); |
|
1958 |
|
1959 s0 = *sp++; |
|
1960 |
|
1961 if (xsize >= 2) { |
|
1962 |
|
1963 s1 = sp[0]; |
|
1964 s2 = sp[1]; |
|
1965 sp += 2; |
|
1966 |
|
1967 #pragma pipeloop(0) |
|
1968 for(i = 0; i <= xsize - 4; i+=2, sp+=2) { |
|
1969 t7 = VIS_LD_U8_I(tab1, s2); |
|
1970 t6 = VIS_LD_U8_I(tab0, s2); |
|
1971 t5 = VIS_LD_U8_I(tab3, s1); |
|
1972 t4 = VIS_LD_U8_I(tab2, s1); |
|
1973 t3 = VIS_LD_U8_I(tab1, s1); |
|
1974 t2 = VIS_LD_U8_I(tab0, s1); |
|
1975 t1 = VIS_LD_U8_I(tab3, s0); |
|
1976 t0 = VIS_LD_U8_I(tab2, s0); |
|
1977 acc = vis_faligndata(t7, acc); |
|
1978 acc = vis_faligndata(t6, acc); |
|
1979 acc = vis_faligndata(t5, acc); |
|
1980 acc = vis_faligndata(t4, acc); |
|
1981 acc = vis_faligndata(t3, acc); |
|
1982 acc = vis_faligndata(t2, acc); |
|
1983 acc = vis_faligndata(t1, acc); |
|
1984 acc = vis_faligndata(t0, acc); |
|
1985 s0 = s2; |
|
1986 s1 = sp[0]; |
|
1987 s2 = sp[1]; |
|
1988 *dp++ = acc; |
|
1989 } |
|
1990 |
|
1991 t7 = VIS_LD_U8_I(tab1, s2); |
|
1992 t6 = VIS_LD_U8_I(tab0, s2); |
|
1993 t5 = VIS_LD_U8_I(tab3, s1); |
|
1994 t4 = VIS_LD_U8_I(tab2, s1); |
|
1995 t3 = VIS_LD_U8_I(tab1, s1); |
|
1996 t2 = VIS_LD_U8_I(tab0, s1); |
|
1997 t1 = VIS_LD_U8_I(tab3, s0); |
|
1998 t0 = VIS_LD_U8_I(tab2, s0); |
|
1999 acc = vis_faligndata(t7, acc); |
|
2000 acc = vis_faligndata(t6, acc); |
|
2001 acc = vis_faligndata(t5, acc); |
|
2002 acc = vis_faligndata(t4, acc); |
|
2003 acc = vis_faligndata(t3, acc); |
|
2004 acc = vis_faligndata(t2, acc); |
|
2005 acc = vis_faligndata(t1, acc); |
|
2006 acc = vis_faligndata(t0, acc); |
|
2007 s0 = s2; |
|
2008 *dp++ = acc; |
|
2009 } |
|
2010 |
|
2011 dl = (mlib_u8*)dp; |
|
2012 |
|
2013 if ((xsize & 1) != 0) { |
|
2014 s1 = sp[0]; |
|
2015 t7 = VIS_LD_U8_I(tab1, s1); |
|
2016 t6 = VIS_LD_U8_I(tab0, s1); |
|
2017 t5 = VIS_LD_U8_I(tab3, s0); |
|
2018 t4 = VIS_LD_U8_I(tab2, s0); |
|
2019 acc = vis_faligndata(t7, acc); |
|
2020 acc = vis_faligndata(t6, acc); |
|
2021 acc = vis_faligndata(t5, acc); |
|
2022 acc = vis_faligndata(t4, acc); |
|
2023 *(mlib_f32*)dl = vis_read_hi(acc); |
|
2024 dl += 4; |
|
2025 s0 = s1; |
|
2026 } |
|
2027 |
|
2028 dl[0] = tab2[s0]; |
|
2029 dl[1] = tab3[s0]; |
|
2030 } |
|
2031 |
|
2032 /***************************************************************/ |
|
2033 void mlib_v_ImageLookUpSI_U8_U8_4_DstOff3_D1_SMALL(const mlib_u8 *src, |
|
2034 mlib_u8 *dst, |
|
2035 mlib_s32 xsize, |
|
2036 const mlib_u8 **table) |
|
2037 { |
|
2038 mlib_u8 *sp; /* pointer to source data */ |
|
2039 mlib_u32 s0, s1, s2; /* source data */ |
|
2040 mlib_u8 *dl; /* pointer to start of destination */ |
|
2041 mlib_d64 *dp; /* aligned pointer to destination */ |
|
2042 mlib_d64 t0, t1, t2; /* destination data */ |
|
2043 mlib_d64 t3, t4, t5; /* destination data */ |
|
2044 mlib_d64 t6, t7, acc; /* destination data */ |
|
2045 mlib_s32 i; /* loop variable */ |
|
2046 const mlib_u8 *tab0 = table[0]; |
|
2047 const mlib_u8 *tab1 = table[1]; |
|
2048 const mlib_u8 *tab2 = table[2]; |
|
2049 const mlib_u8 *tab3 = table[3]; |
|
2050 |
|
2051 sp = (void *)src; |
|
2052 dl = dst; |
|
2053 dp = (mlib_d64 *) dl; |
|
2054 |
|
2055 vis_alignaddr((void *) 0, 7); |
|
2056 |
|
2057 s0 = *sp++; |
|
2058 |
|
2059 if (xsize >= 2) { |
|
2060 |
|
2061 s1 = sp[0]; |
|
2062 s2 = sp[1]; |
|
2063 sp += 2; |
|
2064 |
|
2065 #pragma pipeloop(0) |
|
2066 for(i = 0; i <= xsize - 4; i+=2, sp+=2) { |
|
2067 t7 = VIS_LD_U8_I(tab2, s2); |
|
2068 t6 = VIS_LD_U8_I(tab1, s2); |
|
2069 t5 = VIS_LD_U8_I(tab0, s2); |
|
2070 t4 = VIS_LD_U8_I(tab3, s1); |
|
2071 t3 = VIS_LD_U8_I(tab2, s1); |
|
2072 t2 = VIS_LD_U8_I(tab1, s1); |
|
2073 t1 = VIS_LD_U8_I(tab0, s1); |
|
2074 t0 = VIS_LD_U8_I(tab3, s0); |
|
2075 acc = vis_faligndata(t7, acc); |
|
2076 acc = vis_faligndata(t6, acc); |
|
2077 acc = vis_faligndata(t5, acc); |
|
2078 acc = vis_faligndata(t4, acc); |
|
2079 acc = vis_faligndata(t3, acc); |
|
2080 acc = vis_faligndata(t2, acc); |
|
2081 acc = vis_faligndata(t1, acc); |
|
2082 acc = vis_faligndata(t0, acc); |
|
2083 s0 = s2; |
|
2084 s1 = sp[0]; |
|
2085 s2 = sp[1]; |
|
2086 *dp++ = acc; |
|
2087 } |
|
2088 |
|
2089 t7 = VIS_LD_U8_I(tab2, s2); |
|
2090 t6 = VIS_LD_U8_I(tab1, s2); |
|
2091 t5 = VIS_LD_U8_I(tab0, s2); |
|
2092 t4 = VIS_LD_U8_I(tab3, s1); |
|
2093 t3 = VIS_LD_U8_I(tab2, s1); |
|
2094 t2 = VIS_LD_U8_I(tab1, s1); |
|
2095 t1 = VIS_LD_U8_I(tab0, s1); |
|
2096 t0 = VIS_LD_U8_I(tab3, s0); |
|
2097 acc = vis_faligndata(t7, acc); |
|
2098 acc = vis_faligndata(t6, acc); |
|
2099 acc = vis_faligndata(t5, acc); |
|
2100 acc = vis_faligndata(t4, acc); |
|
2101 acc = vis_faligndata(t3, acc); |
|
2102 acc = vis_faligndata(t2, acc); |
|
2103 acc = vis_faligndata(t1, acc); |
|
2104 acc = vis_faligndata(t0, acc); |
|
2105 s0 = s2; |
|
2106 *dp++ = acc; |
|
2107 } |
|
2108 |
|
2109 dl = (mlib_u8*)dp; |
|
2110 |
|
2111 if ((xsize & 1) != 0) { |
|
2112 s1 = sp[0]; |
|
2113 t7 = VIS_LD_U8_I(tab2, s1); |
|
2114 t6 = VIS_LD_U8_I(tab1, s1); |
|
2115 t5 = VIS_LD_U8_I(tab0, s1); |
|
2116 t4 = VIS_LD_U8_I(tab3, s0); |
|
2117 acc = vis_faligndata(t7, acc); |
|
2118 acc = vis_faligndata(t6, acc); |
|
2119 acc = vis_faligndata(t5, acc); |
|
2120 acc = vis_faligndata(t4, acc); |
|
2121 *(mlib_f32*)dl = vis_read_hi(acc); |
|
2122 dl += 4; |
|
2123 s0 = s1; |
|
2124 } |
|
2125 |
|
2126 dl[0] = tab3[s0]; |
|
2127 } |
|
2128 |
|
2129 /***************************************************************/ |
|
2130 void mlib_v_ImageLookUpSI_U8_U8_4(const mlib_u8 *src, |
|
2131 mlib_s32 slb, |
|
2132 mlib_u8 *dst, |
|
2133 mlib_s32 dlb, |
|
2134 mlib_s32 xsize, |
|
2135 mlib_s32 ysize, |
|
2136 const mlib_u8 **table) |
|
2137 { |
|
2138 if ((xsize * ysize) < 500) { |
|
2139 mlib_u8 *sl; |
|
2140 mlib_u8 *dl; |
|
2141 mlib_s32 j; |
|
2142 const mlib_u8 *tab0 = table[0]; |
|
2143 const mlib_u8 *tab1 = table[1]; |
|
2144 const mlib_u8 *tab2 = table[2]; |
|
2145 const mlib_u8 *tab3 = table[3]; |
|
2146 |
|
2147 sl = (void *)src; |
|
2148 dl = dst; |
|
2149 |
|
2150 /* row loop */ |
|
2151 for (j = 0; j < ysize; j ++) { |
|
2152 mlib_u8 *sp = sl; |
|
2153 mlib_u8 *dp = dl; |
|
2154 mlib_s32 off, s0, size = xsize; |
|
2155 |
|
2156 off = (8 - ((mlib_addr)dp & 7)) & 7; |
|
2157 |
|
2158 if ((off >= 4) && (size > 0)) { |
|
2159 s0 = *sp++; |
|
2160 *dp++ = tab0[s0]; |
|
2161 *dp++ = tab1[s0]; |
|
2162 *dp++ = tab2[s0]; |
|
2163 *dp++ = tab3[s0]; |
|
2164 size--; |
|
2165 } |
|
2166 |
|
2167 if (size > 0) { |
|
2168 off = (4 - ((mlib_addr)dp & 3)) & 3; |
|
2169 |
|
2170 if (off == 0) { |
|
2171 mlib_v_ImageLookUpSI_U8_U8_4_DstOff0_D1_SMALL(sp, dp, size, table); |
|
2172 } else if (off == 1) { |
|
2173 s0 = *sp; |
|
2174 *dp++ = tab0[s0]; |
|
2175 size--; |
|
2176 mlib_v_ImageLookUpSI_U8_U8_4_DstOff1_D1_SMALL(sp, dp, size, table); |
|
2177 } else if (off == 2) { |
|
2178 s0 = *sp; |
|
2179 *dp++ = tab0[s0]; |
|
2180 *dp++ = tab1[s0]; |
|
2181 size--; |
|
2182 mlib_v_ImageLookUpSI_U8_U8_4_DstOff2_D1_SMALL(sp, dp, size, table); |
|
2183 } else if (off == 3) { |
|
2184 s0 = *sp; |
|
2185 *dp++ = tab0[s0]; |
|
2186 *dp++ = tab1[s0]; |
|
2187 *dp++ = tab2[s0]; |
|
2188 size--; |
|
2189 mlib_v_ImageLookUpSI_U8_U8_4_DstOff3_D1_SMALL(sp, dp, size, table); |
|
2190 } |
|
2191 } |
|
2192 |
|
2193 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); |
|
2194 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); |
|
2195 } |
|
2196 |
|
2197 } else { |
|
2198 mlib_u8 *sl; |
|
2199 mlib_u8 *dl; |
|
2200 mlib_u32 tab[256]; |
|
2201 const mlib_u8 *tab0 = table[0]; |
|
2202 const mlib_u8 *tab1 = table[1]; |
|
2203 const mlib_u8 *tab2 = table[2]; |
|
2204 const mlib_u8 *tab3 = table[3]; |
|
2205 mlib_s32 i, j; |
|
2206 mlib_u32 s0, s1, s2, s3, s4; |
|
2207 |
|
2208 s0 = tab0[0]; |
|
2209 s1 = tab1[0]; |
|
2210 s2 = tab2[0]; |
|
2211 s3 = tab3[0]; |
|
2212 for (i = 1; i < 256; i++) { |
|
2213 s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3; |
|
2214 s0 = tab0[i]; |
|
2215 s1 = tab1[i]; |
|
2216 s2 = tab2[i]; |
|
2217 s3 = tab3[i]; |
|
2218 tab[i-1] = s4; |
|
2219 } |
|
2220 |
|
2221 s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3; |
|
2222 tab[255] = s4; |
|
2223 |
|
2224 sl = (void *)src; |
|
2225 dl = dst; |
|
2226 |
|
2227 /* row loop */ |
|
2228 for (j = 0; j < ysize; j ++) { |
|
2229 mlib_u8 *sp = sl; |
|
2230 mlib_u8 *dp = dl; |
|
2231 mlib_s32 off, size = xsize; |
|
2232 |
|
2233 if (((mlib_addr)dp & 3) == 0) { |
|
2234 off = (4 - (mlib_addr)sp & 3) & 3; |
|
2235 |
|
2236 off = (off < size) ? off : size; |
|
2237 |
|
2238 #pragma pipeloop(0) |
|
2239 for (i = 0; i < off; i++) { |
|
2240 *(mlib_u32*)dp = tab[(*sp)]; |
|
2241 dp += 4; sp++; |
|
2242 } |
|
2243 |
|
2244 size -= off; |
|
2245 |
|
2246 if (size > 0) { |
|
2247 mlib_v_ImageLookUpSI_U8_U8_4_SrcOff0_D1(sp, dp, size, (mlib_f32*)tab); |
|
2248 } |
|
2249 |
|
2250 } else { |
|
2251 |
|
2252 off = ((4 - ((mlib_addr)sp & 3)) & 3); |
|
2253 off = (off < size) ? off : size; |
|
2254 |
|
2255 for (i = 0; i < off; i++) { |
|
2256 s0 = tab[(*sp)]; |
|
2257 *dp++ = (s0 >> 24); |
|
2258 *dp++ = (s0 >> 16); |
|
2259 *dp++ = (s0 >> 8); |
|
2260 *dp++ = s0; |
|
2261 size--; sp++; |
|
2262 } |
|
2263 |
|
2264 if (size > 0) { |
|
2265 mlib_v_ImageLookUpSI_U8_U8_4_DstNonAl_D1(sp, dp, size, (mlib_f32*)tab); |
|
2266 } |
|
2267 } |
|
2268 |
|
2269 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb); |
|
2270 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb); |
|
2271 } |
|
2272 } |
|
2273 } |
|
2274 |
|
2275 /***************************************************************/ |