jdk/src/solaris/native/sun/awt/medialib/mlib_v_ImageLookUpSIS32U8Func.c
changeset 2 90ce3da70b43
child 5506 202f599c92aa
equal deleted inserted replaced
0:fd16c54261b3 2:90ce3da70b43
       
     1 /*
       
     2  * Copyright 1998-2003 Sun Microsystems, Inc.  All Rights Reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Sun designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Sun in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
       
    23  * have any questions.
       
    24  */
       
    25 
       
    26 
       
    27 
       
    28 #include "vis_proto.h"
       
    29 #include "mlib_image.h"
       
    30 #include "mlib_v_ImageLookUpFunc.h"
       
    31 
       
    32 /***************************************************************/
       
    33 static void mlib_v_ImageLookUpSI_S32_U8_2_DstA8D1(const mlib_s32 *src,
       
    34                                                   mlib_u8        *dst,
       
    35                                                   mlib_s32       xsize,
       
    36                                                   const mlib_u8  **table);
       
    37 
       
    38 static void mlib_v_ImageLookUpSI_S32_U8_2_D1(const mlib_s32 *src,
       
    39                                              mlib_u8        *dst,
       
    40                                              mlib_s32       xsize,
       
    41                                              const mlib_u8  **table);
       
    42 
       
    43 static void mlib_v_ImageLookUpSI_S32_U8_3_D1(const mlib_s32 *src,
       
    44                                              mlib_u8        *dst,
       
    45                                              mlib_s32       xsize,
       
    46                                              const mlib_u8  **table);
       
    47 
       
    48 static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff0_D1(const mlib_s32 *src,
       
    49                                                      mlib_u8        *dst,
       
    50                                                      mlib_s32       xsize,
       
    51                                                      const mlib_u8  **table);
       
    52 
       
    53 static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff1_D1(const mlib_s32 *src,
       
    54                                                      mlib_u8        *dst,
       
    55                                                      mlib_s32       xsize,
       
    56                                                      const mlib_u8  **table);
       
    57 
       
    58 static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff2_D1(const mlib_s32 *src,
       
    59                                                      mlib_u8        *dst,
       
    60                                                      mlib_s32       xsize,
       
    61                                                      const mlib_u8  **table);
       
    62 
       
    63 static void mlib_v_ImageLookUpSI_S32_U8_4_DstOff3_D1(const mlib_s32 *src,
       
    64                                                      mlib_u8        *dst,
       
    65                                                      mlib_s32       xsize,
       
    66                                                      const mlib_u8  **table);
       
    67 
       
    68 /***************************************************************/
       
    69 #define VIS_LD_U8_I(X, Y)       vis_ld_u8_i((void *)(X), (Y))
       
    70 
       
    71 /***************************************************************/
       
    72 void mlib_v_ImageLookUpSI_S32_U8_2_DstA8D1(const mlib_s32 *src,
       
    73                                            mlib_u8        *dst,
       
    74                                            mlib_s32       xsize,
       
    75                                            const mlib_u8  **table)
       
    76 {
       
    77   mlib_s32 *sp;                        /* pointer to source data */
       
    78   mlib_s32 s0, s1, s2, s3;             /* source data */
       
    79   mlib_u16 *dl;                        /* pointer to start of destination */
       
    80   mlib_u16 *dend;                      /* pointer to end of destination */
       
    81   mlib_d64 *dp;                        /* aligned pointer to destination */
       
    82   mlib_d64 t0, t1, t2;                 /* destination data */
       
    83   mlib_d64 t3, t4, t5;                 /* destination data */
       
    84   mlib_d64 t6, t7, acc;                /* destination data */
       
    85   mlib_s32 emask;                      /* edge mask */
       
    86   mlib_s32 i, num;                     /* loop variable */
       
    87   const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
       
    88   const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
       
    89 
       
    90   sp = (void *)src;
       
    91   dl = (mlib_u16 *) dst;
       
    92   dp = (mlib_d64 *) dl;
       
    93   dend = dl + xsize - 1;
       
    94 
       
    95   vis_alignaddr((void *)0, 7);
       
    96 
       
    97   if (xsize >= 4) {
       
    98 
       
    99     s0 = sp[0];
       
   100     s1 = sp[1];
       
   101     s2 = sp[2];
       
   102     s3 = sp[3];
       
   103     sp += 4;
       
   104 
       
   105 #pragma pipeloop(0)
       
   106     for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
       
   107       t7 = VIS_LD_U8_I(tab1, s3);
       
   108       t6 = VIS_LD_U8_I(tab0, s3);
       
   109       t5 = VIS_LD_U8_I(tab1, s2);
       
   110       t4 = VIS_LD_U8_I(tab0, s2);
       
   111       t3 = VIS_LD_U8_I(tab1, s1);
       
   112       t2 = VIS_LD_U8_I(tab0, s1);
       
   113       t1 = VIS_LD_U8_I(tab1, s0);
       
   114       t0 = VIS_LD_U8_I(tab0, s0);
       
   115       acc = vis_faligndata(t7, acc);
       
   116       acc = vis_faligndata(t6, acc);
       
   117       acc = vis_faligndata(t5, acc);
       
   118       acc = vis_faligndata(t4, acc);
       
   119       acc = vis_faligndata(t3, acc);
       
   120       acc = vis_faligndata(t2, acc);
       
   121       acc = vis_faligndata(t1, acc);
       
   122       acc = vis_faligndata(t0, acc);
       
   123       s0 = sp[0];
       
   124       s1 = sp[1];
       
   125       s2 = sp[2];
       
   126       s3 = sp[3];
       
   127       *dp++ = acc;
       
   128     }
       
   129 
       
   130     t7 = VIS_LD_U8_I(tab1, s3);
       
   131     t6 = VIS_LD_U8_I(tab0, s3);
       
   132     t5 = VIS_LD_U8_I(tab1, s2);
       
   133     t4 = VIS_LD_U8_I(tab0, s2);
       
   134     t3 = VIS_LD_U8_I(tab1, s1);
       
   135     t2 = VIS_LD_U8_I(tab0, s1);
       
   136     t1 = VIS_LD_U8_I(tab1, s0);
       
   137     t0 = VIS_LD_U8_I(tab0, s0);
       
   138     acc = vis_faligndata(t7, acc);
       
   139     acc = vis_faligndata(t6, acc);
       
   140     acc = vis_faligndata(t5, acc);
       
   141     acc = vis_faligndata(t4, acc);
       
   142     acc = vis_faligndata(t3, acc);
       
   143     acc = vis_faligndata(t2, acc);
       
   144     acc = vis_faligndata(t1, acc);
       
   145     acc = vis_faligndata(t0, acc);
       
   146     *dp++ = acc;
       
   147   }
       
   148 
       
   149   if ((mlib_addr) dp <= (mlib_addr) dend) {
       
   150 
       
   151     num = (mlib_s32) ((mlib_u16 *) dend - (mlib_u16 *) dp);
       
   152     sp += num;
       
   153     num++;
       
   154 #pragma pipeloop(0)
       
   155     for (i = 0; i < num; i++) {
       
   156       s0 = *sp;
       
   157       sp--;
       
   158 
       
   159       t0 = VIS_LD_U8_I(tab1, s0);
       
   160       acc = vis_faligndata(t0, acc);
       
   161 
       
   162       t0 = VIS_LD_U8_I(tab0, s0);
       
   163       acc = vis_faligndata(t0, acc);
       
   164     }
       
   165 
       
   166     emask = vis_edge16(dp, dend);
       
   167     vis_pst_16(acc, dp, emask);
       
   168   }
       
   169 }
       
   170 
       
   171 /***************************************************************/
       
   172 void mlib_v_ImageLookUpSI_S32_U8_2_D1(const mlib_s32 *src,
       
   173                                       mlib_u8        *dst,
       
   174                                       mlib_s32       xsize,
       
   175                                       const mlib_u8  **table)
       
   176 {
       
   177   mlib_s32 *sp;                        /* pointer to source data */
       
   178   mlib_s32 s0, s1, s2, s3, s4;         /* source data */
       
   179   mlib_u8 *dl;                         /* pointer to start of destination */
       
   180   mlib_u8 *dend;                       /* pointer to end of destination */
       
   181   mlib_d64 *dp;                        /* aligned pointer to destination */
       
   182   mlib_d64 t0, t1, t2;                 /* destination data */
       
   183   mlib_d64 t3, t4, t5;                 /* destination data */
       
   184   mlib_d64 t6, t7, acc;                /* destination data */
       
   185   mlib_s32 emask;                      /* edge mask */
       
   186   mlib_s32 i, num;                     /* loop variable */
       
   187   const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
       
   188   const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
       
   189 
       
   190   sp = (void *)src;
       
   191   dl = dst;
       
   192 
       
   193   dend = dl + 2 * xsize - 1;
       
   194 
       
   195   vis_alignaddr((void *)0, 7);
       
   196 
       
   197   s0 = *sp++;
       
   198   *dl++ = tab0[s0];
       
   199   dp = (mlib_d64 *) dl;
       
   200   xsize--;
       
   201 
       
   202   if (xsize >= 4) {
       
   203 
       
   204     s1 = sp[0];
       
   205     s2 = sp[1];
       
   206     s3 = sp[2];
       
   207     s4 = sp[3];
       
   208     sp += 4;
       
   209 
       
   210 #pragma pipeloop(0)
       
   211     for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
       
   212       t7 = VIS_LD_U8_I(tab0, s4);
       
   213       t6 = VIS_LD_U8_I(tab1, s3);
       
   214       t5 = VIS_LD_U8_I(tab0, s3);
       
   215       t4 = VIS_LD_U8_I(tab1, s2);
       
   216       t3 = VIS_LD_U8_I(tab0, s2);
       
   217       t2 = VIS_LD_U8_I(tab1, s1);
       
   218       t1 = VIS_LD_U8_I(tab0, s1);
       
   219       t0 = VIS_LD_U8_I(tab1, s0);
       
   220       acc = vis_faligndata(t7, acc);
       
   221       acc = vis_faligndata(t6, acc);
       
   222       acc = vis_faligndata(t5, acc);
       
   223       acc = vis_faligndata(t4, acc);
       
   224       acc = vis_faligndata(t3, acc);
       
   225       acc = vis_faligndata(t2, acc);
       
   226       acc = vis_faligndata(t1, acc);
       
   227       acc = vis_faligndata(t0, acc);
       
   228       s0 = s4;
       
   229       s1 = sp[0];
       
   230       s2 = sp[1];
       
   231       s3 = sp[2];
       
   232       s4 = sp[3];
       
   233       *dp++ = acc;
       
   234     }
       
   235 
       
   236     t7 = VIS_LD_U8_I(tab0, s4);
       
   237     t6 = VIS_LD_U8_I(tab1, s3);
       
   238     t5 = VIS_LD_U8_I(tab0, s3);
       
   239     t4 = VIS_LD_U8_I(tab1, s2);
       
   240     t3 = VIS_LD_U8_I(tab0, s2);
       
   241     t2 = VIS_LD_U8_I(tab1, s1);
       
   242     t1 = VIS_LD_U8_I(tab0, s1);
       
   243     t0 = VIS_LD_U8_I(tab1, s0);
       
   244     acc = vis_faligndata(t7, acc);
       
   245     acc = vis_faligndata(t6, acc);
       
   246     acc = vis_faligndata(t5, acc);
       
   247     acc = vis_faligndata(t4, acc);
       
   248     acc = vis_faligndata(t3, acc);
       
   249     acc = vis_faligndata(t2, acc);
       
   250     acc = vis_faligndata(t1, acc);
       
   251     acc = vis_faligndata(t0, acc);
       
   252     s0 = s4;
       
   253     *dp++ = acc;
       
   254   }
       
   255 
       
   256   num = (mlib_s32) (((mlib_u8 *) dend - (mlib_u8 *) dp) >> 1);
       
   257   sp += num - 1;
       
   258 
       
   259 #pragma pipeloop(0)
       
   260   for (i = 0; i < num; i++) {
       
   261     s1 = *sp;
       
   262     sp--;
       
   263 
       
   264     t0 = VIS_LD_U8_I(tab1, s1);
       
   265     acc = vis_faligndata(t0, acc);
       
   266 
       
   267     t0 = VIS_LD_U8_I(tab0, s1);
       
   268     acc = vis_faligndata(t0, acc);
       
   269   }
       
   270 
       
   271   t0 = VIS_LD_U8_I(tab1, s0);
       
   272   acc = vis_faligndata(t0, acc);
       
   273   emask = vis_edge8(dp, dend);
       
   274   vis_pst_8(acc, dp, emask);
       
   275 }
       
   276 
       
   277 /***************************************************************/
       
   278 void mlib_v_ImageLookUpSI_S32_U8_2(const mlib_s32 *src,
       
   279                                    mlib_s32       slb,
       
   280                                    mlib_u8        *dst,
       
   281                                    mlib_s32       dlb,
       
   282                                    mlib_s32       xsize,
       
   283                                    mlib_s32       ysize,
       
   284                                    const mlib_u8  **table)
       
   285 {
       
   286   mlib_s32 *sl;
       
   287   mlib_u8 *dl;
       
   288   mlib_s32 i, j;
       
   289   const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
       
   290   const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
       
   291 
       
   292   sl = (void *)src;
       
   293   dl = dst;
       
   294 
       
   295   /* row loop */
       
   296   for (j = 0; j < ysize; j++) {
       
   297     mlib_s32 *sp = sl;
       
   298     mlib_u8 *dp = dl;
       
   299     mlib_s32 off, s0, size = xsize;
       
   300 
       
   301     off = (mlib_s32) (((8 - ((mlib_addr) dp & 7)) & 7) >> 1);
       
   302     off = (off < size) ? off : size;
       
   303 
       
   304     for (i = 0; i < off; i++) {
       
   305       s0 = *sp++;
       
   306       *dp++ = tab0[s0];
       
   307       *dp++ = tab1[s0];
       
   308       size--;
       
   309     }
       
   310 
       
   311     if (size > 0) {
       
   312 
       
   313       if (((mlib_addr) dp & 1) == 0) {
       
   314         mlib_v_ImageLookUpSI_S32_U8_2_DstA8D1(sp, dp, size, table);
       
   315       }
       
   316       else {
       
   317         mlib_v_ImageLookUpSI_S32_U8_2_D1(sp, dp, size, table);
       
   318       }
       
   319     }
       
   320 
       
   321     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
       
   322     dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
       
   323   }
       
   324 }
       
   325 
       
   326 /***************************************************************/
       
   327 void mlib_v_ImageLookUpSI_S32_U8_3_D1(const mlib_s32 *src,
       
   328                                       mlib_u8        *dst,
       
   329                                       mlib_s32       xsize,
       
   330                                       const mlib_u8  **table)
       
   331 {
       
   332   mlib_s32 *sp;                        /* pointer to source data */
       
   333   mlib_u8 *dl;                         /* pointer to start of destination */
       
   334   mlib_d64 *dp;                        /* aligned pointer to destination */
       
   335   mlib_d64 t0, t1, t2;                 /* destination data */
       
   336   mlib_d64 t3, t4, t5;                 /* destination data */
       
   337   mlib_d64 t6, t7;                     /* destination data */
       
   338   mlib_d64 acc0, acc1, acc2;           /* destination data */
       
   339   mlib_s32 i;                          /* loop variable */
       
   340   const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
       
   341   const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
       
   342   const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
       
   343   mlib_s32 s00, s01, s02, s03;
       
   344   mlib_s32 s10, s11, s12, s13;
       
   345 
       
   346   sp = (void *)src;
       
   347   dl = dst;
       
   348   dp = (mlib_d64 *) dl;
       
   349 
       
   350   vis_alignaddr((void *)0, 7);
       
   351 
       
   352   i = 0;
       
   353 
       
   354   if (xsize >= 8) {
       
   355 
       
   356     s00 = sp[0];
       
   357     s01 = sp[1];
       
   358     s02 = sp[2];
       
   359     s03 = sp[3];
       
   360     s10 = sp[4];
       
   361     s11 = sp[5];
       
   362     s12 = sp[6];
       
   363     s13 = sp[7];
       
   364     sp += 8;
       
   365 
       
   366 #pragma pipeloop(0)
       
   367     for (i = 0; i <= xsize - 16; i += 8, sp += 8) {
       
   368       t7 = VIS_LD_U8_I(tab1, s02);
       
   369       t6 = VIS_LD_U8_I(tab0, s02);
       
   370       t5 = VIS_LD_U8_I(tab2, s01);
       
   371       t4 = VIS_LD_U8_I(tab1, s01);
       
   372       t3 = VIS_LD_U8_I(tab0, s01);
       
   373       t2 = VIS_LD_U8_I(tab2, s00);
       
   374       t1 = VIS_LD_U8_I(tab1, s00);
       
   375       t0 = VIS_LD_U8_I(tab0, s00);
       
   376       acc0 = vis_faligndata(t7, acc0);
       
   377       acc0 = vis_faligndata(t6, acc0);
       
   378       acc0 = vis_faligndata(t5, acc0);
       
   379       acc0 = vis_faligndata(t4, acc0);
       
   380       acc0 = vis_faligndata(t3, acc0);
       
   381       acc0 = vis_faligndata(t2, acc0);
       
   382       acc0 = vis_faligndata(t1, acc0);
       
   383       acc0 = vis_faligndata(t0, acc0);
       
   384       t7 = VIS_LD_U8_I(tab0, s11);
       
   385       t6 = VIS_LD_U8_I(tab2, s10);
       
   386       t5 = VIS_LD_U8_I(tab1, s10);
       
   387       t4 = VIS_LD_U8_I(tab0, s10);
       
   388       t3 = VIS_LD_U8_I(tab2, s03);
       
   389       t2 = VIS_LD_U8_I(tab1, s03);
       
   390       t1 = VIS_LD_U8_I(tab0, s03);
       
   391       t0 = VIS_LD_U8_I(tab2, s02);
       
   392       acc1 = vis_faligndata(t7, acc1);
       
   393       acc1 = vis_faligndata(t6, acc1);
       
   394       acc1 = vis_faligndata(t5, acc1);
       
   395       acc1 = vis_faligndata(t4, acc1);
       
   396       acc1 = vis_faligndata(t3, acc1);
       
   397       acc1 = vis_faligndata(t2, acc1);
       
   398       acc1 = vis_faligndata(t1, acc1);
       
   399       acc1 = vis_faligndata(t0, acc1);
       
   400       t7 = VIS_LD_U8_I(tab2, s13);
       
   401       t6 = VIS_LD_U8_I(tab1, s13);
       
   402       t5 = VIS_LD_U8_I(tab0, s13);
       
   403       t4 = VIS_LD_U8_I(tab2, s12);
       
   404       t3 = VIS_LD_U8_I(tab1, s12);
       
   405       t2 = VIS_LD_U8_I(tab0, s12);
       
   406       t1 = VIS_LD_U8_I(tab2, s11);
       
   407       t0 = VIS_LD_U8_I(tab1, s11);
       
   408       acc2 = vis_faligndata(t7, acc2);
       
   409       acc2 = vis_faligndata(t6, acc2);
       
   410       acc2 = vis_faligndata(t5, acc2);
       
   411       acc2 = vis_faligndata(t4, acc2);
       
   412       acc2 = vis_faligndata(t3, acc2);
       
   413       acc2 = vis_faligndata(t2, acc2);
       
   414       acc2 = vis_faligndata(t1, acc2);
       
   415       acc2 = vis_faligndata(t0, acc2);
       
   416       s00 = sp[0];
       
   417       s01 = sp[1];
       
   418       s02 = sp[2];
       
   419       s03 = sp[3];
       
   420       s10 = sp[4];
       
   421       s11 = sp[5];
       
   422       s12 = sp[6];
       
   423       s13 = sp[7];
       
   424       *dp++ = acc0;
       
   425       *dp++ = acc1;
       
   426       *dp++ = acc2;
       
   427     }
       
   428 
       
   429     t7 = VIS_LD_U8_I(tab1, s02);
       
   430     t6 = VIS_LD_U8_I(tab0, s02);
       
   431     t5 = VIS_LD_U8_I(tab2, s01);
       
   432     t4 = VIS_LD_U8_I(tab1, s01);
       
   433     t3 = VIS_LD_U8_I(tab0, s01);
       
   434     t2 = VIS_LD_U8_I(tab2, s00);
       
   435     t1 = VIS_LD_U8_I(tab1, s00);
       
   436     t0 = VIS_LD_U8_I(tab0, s00);
       
   437     acc0 = vis_faligndata(t7, acc0);
       
   438     acc0 = vis_faligndata(t6, acc0);
       
   439     acc0 = vis_faligndata(t5, acc0);
       
   440     acc0 = vis_faligndata(t4, acc0);
       
   441     acc0 = vis_faligndata(t3, acc0);
       
   442     acc0 = vis_faligndata(t2, acc0);
       
   443     acc0 = vis_faligndata(t1, acc0);
       
   444     acc0 = vis_faligndata(t0, acc0);
       
   445     t7 = VIS_LD_U8_I(tab0, s11);
       
   446     t6 = VIS_LD_U8_I(tab2, s10);
       
   447     t5 = VIS_LD_U8_I(tab1, s10);
       
   448     t4 = VIS_LD_U8_I(tab0, s10);
       
   449     t3 = VIS_LD_U8_I(tab2, s03);
       
   450     t2 = VIS_LD_U8_I(tab1, s03);
       
   451     t1 = VIS_LD_U8_I(tab0, s03);
       
   452     t0 = VIS_LD_U8_I(tab2, s02);
       
   453     acc1 = vis_faligndata(t7, acc1);
       
   454     acc1 = vis_faligndata(t6, acc1);
       
   455     acc1 = vis_faligndata(t5, acc1);
       
   456     acc1 = vis_faligndata(t4, acc1);
       
   457     acc1 = vis_faligndata(t3, acc1);
       
   458     acc1 = vis_faligndata(t2, acc1);
       
   459     acc1 = vis_faligndata(t1, acc1);
       
   460     acc1 = vis_faligndata(t0, acc1);
       
   461     t7 = VIS_LD_U8_I(tab2, s13);
       
   462     t6 = VIS_LD_U8_I(tab1, s13);
       
   463     t5 = VIS_LD_U8_I(tab0, s13);
       
   464     t4 = VIS_LD_U8_I(tab2, s12);
       
   465     t3 = VIS_LD_U8_I(tab1, s12);
       
   466     t2 = VIS_LD_U8_I(tab0, s12);
       
   467     t1 = VIS_LD_U8_I(tab2, s11);
       
   468     t0 = VIS_LD_U8_I(tab1, s11);
       
   469     acc2 = vis_faligndata(t7, acc2);
       
   470     acc2 = vis_faligndata(t6, acc2);
       
   471     acc2 = vis_faligndata(t5, acc2);
       
   472     acc2 = vis_faligndata(t4, acc2);
       
   473     acc2 = vis_faligndata(t3, acc2);
       
   474     acc2 = vis_faligndata(t2, acc2);
       
   475     acc2 = vis_faligndata(t1, acc2);
       
   476     acc2 = vis_faligndata(t0, acc2);
       
   477     *dp++ = acc0;
       
   478     *dp++ = acc1;
       
   479     *dp++ = acc2;
       
   480     i += 8;
       
   481   }
       
   482 
       
   483   dl = (mlib_u8 *) dp;
       
   484 
       
   485 #pragma pipeloop(0)
       
   486   for (; i < xsize; i++) {
       
   487     s00 = sp[0];
       
   488     dl[0] = tab0[s00];
       
   489     dl[1] = tab1[s00];
       
   490     dl[2] = tab2[s00];
       
   491     dl += 3;
       
   492     sp++;
       
   493   }
       
   494 }
       
   495 
       
   496 /***************************************************************/
       
   497 void mlib_v_ImageLookUpSI_S32_U8_3(const mlib_s32 *src,
       
   498                                    mlib_s32       slb,
       
   499                                    mlib_u8        *dst,
       
   500                                    mlib_s32       dlb,
       
   501                                    mlib_s32       xsize,
       
   502                                    mlib_s32       ysize,
       
   503                                    const mlib_u8  **table)
       
   504 {
       
   505   mlib_s32 *sl;
       
   506   mlib_u8 *dl;
       
   507   mlib_s32 i, j;
       
   508   const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
       
   509   const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
       
   510   const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
       
   511 
       
   512   sl = (void *)src;
       
   513   dl = dst;
       
   514 
       
   515   /* row loop */
       
   516   for (j = 0; j < ysize; j++) {
       
   517     mlib_s32 *sp = sl;
       
   518     mlib_u8 *dp = dl;
       
   519     mlib_s32 off, s0, size = xsize;
       
   520 
       
   521     off = (mlib_s32) ((mlib_addr) dp & 7);
       
   522     off = (off * 5) & 7;
       
   523     off = (off < size) ? off : size;
       
   524 
       
   525     for (i = 0; i < off; i++) {
       
   526       s0 = *sp++;
       
   527       *dp++ = tab0[s0];
       
   528       *dp++ = tab1[s0];
       
   529       *dp++ = tab2[s0];
       
   530       size--;
       
   531     }
       
   532 
       
   533     if (size > 0) {
       
   534       mlib_v_ImageLookUpSI_S32_U8_3_D1(sp, dp, size, table);
       
   535     }
       
   536 
       
   537     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
       
   538     dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
       
   539   }
       
   540 }
       
   541 
       
   542 /***************************************************************/
       
   543 void mlib_v_ImageLookUpSI_S32_U8_4_DstOff0_D1(const mlib_s32 *src,
       
   544                                               mlib_u8        *dst,
       
   545                                               mlib_s32       xsize,
       
   546                                               const mlib_u8  **table)
       
   547 {
       
   548   mlib_s32 *sp;                        /* pointer to source data */
       
   549   mlib_s32 s0, s1;                     /* source data */
       
   550   mlib_u8 *dl;                         /* pointer to start of destination */
       
   551   mlib_d64 *dp;                        /* aligned pointer to destination */
       
   552   mlib_d64 t0, t1, t2;                 /* destination data */
       
   553   mlib_d64 t3, t4, t5;                 /* destination data */
       
   554   mlib_d64 t6, t7, acc;                /* destination data */
       
   555   mlib_s32 i;                          /* loop variable */
       
   556   const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
       
   557   const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
       
   558   const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
       
   559   const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
       
   560 
       
   561   sp = (void *)src;
       
   562   dl = dst;
       
   563   dp = (mlib_d64 *) dl;
       
   564 
       
   565   vis_alignaddr((void *)0, 7);
       
   566 
       
   567   if (xsize >= 2) {
       
   568 
       
   569     s0 = sp[0];
       
   570     s1 = sp[1];
       
   571     sp += 2;
       
   572 
       
   573 #pragma pipeloop(0)
       
   574     for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
       
   575       t7 = VIS_LD_U8_I(tab3, s1);
       
   576       t6 = VIS_LD_U8_I(tab2, s1);
       
   577       t5 = VIS_LD_U8_I(tab1, s1);
       
   578       t4 = VIS_LD_U8_I(tab0, s1);
       
   579       t3 = VIS_LD_U8_I(tab3, s0);
       
   580       t2 = VIS_LD_U8_I(tab2, s0);
       
   581       t1 = VIS_LD_U8_I(tab1, s0);
       
   582       t0 = VIS_LD_U8_I(tab0, s0);
       
   583       acc = vis_faligndata(t7, acc);
       
   584       acc = vis_faligndata(t6, acc);
       
   585       acc = vis_faligndata(t5, acc);
       
   586       acc = vis_faligndata(t4, acc);
       
   587       acc = vis_faligndata(t3, acc);
       
   588       acc = vis_faligndata(t2, acc);
       
   589       acc = vis_faligndata(t1, acc);
       
   590       acc = vis_faligndata(t0, acc);
       
   591       s0 = sp[0];
       
   592       s1 = sp[1];
       
   593       *dp++ = acc;
       
   594     }
       
   595 
       
   596     t7 = VIS_LD_U8_I(tab3, s1);
       
   597     t6 = VIS_LD_U8_I(tab2, s1);
       
   598     t5 = VIS_LD_U8_I(tab1, s1);
       
   599     t4 = VIS_LD_U8_I(tab0, s1);
       
   600     t3 = VIS_LD_U8_I(tab3, s0);
       
   601     t2 = VIS_LD_U8_I(tab2, s0);
       
   602     t1 = VIS_LD_U8_I(tab1, s0);
       
   603     t0 = VIS_LD_U8_I(tab0, s0);
       
   604     acc = vis_faligndata(t7, acc);
       
   605     acc = vis_faligndata(t6, acc);
       
   606     acc = vis_faligndata(t5, acc);
       
   607     acc = vis_faligndata(t4, acc);
       
   608     acc = vis_faligndata(t3, acc);
       
   609     acc = vis_faligndata(t2, acc);
       
   610     acc = vis_faligndata(t1, acc);
       
   611     acc = vis_faligndata(t0, acc);
       
   612     *dp++ = acc;
       
   613   }
       
   614 
       
   615   if ((xsize & 1) != 0) {
       
   616     s0 = sp[0];
       
   617     t7 = VIS_LD_U8_I(tab3, s0);
       
   618     t6 = VIS_LD_U8_I(tab2, s0);
       
   619     t5 = VIS_LD_U8_I(tab1, s0);
       
   620     t4 = VIS_LD_U8_I(tab0, s0);
       
   621     acc = vis_faligndata(t7, acc);
       
   622     acc = vis_faligndata(t6, acc);
       
   623     acc = vis_faligndata(t5, acc);
       
   624     acc = vis_faligndata(t4, acc);
       
   625     *(mlib_f32 *) dp = vis_read_hi(acc);
       
   626   }
       
   627 }
       
   628 
       
   629 /***************************************************************/
       
   630 void mlib_v_ImageLookUpSI_S32_U8_4_DstOff1_D1(const mlib_s32 *src,
       
   631                                               mlib_u8        *dst,
       
   632                                               mlib_s32       xsize,
       
   633                                               const mlib_u8  **table)
       
   634 {
       
   635   mlib_s32 *sp;                        /* pointer to source data */
       
   636   mlib_s32 s0, s1, s2;                 /* source data */
       
   637   mlib_u8 *dl;                         /* pointer to start of destination */
       
   638   mlib_d64 *dp;                        /* aligned pointer to destination */
       
   639   mlib_d64 t0, t1, t2;                 /* destination data */
       
   640   mlib_d64 t3, t4, t5;                 /* destination data */
       
   641   mlib_d64 t6, t7, acc;                /* destination data */
       
   642   mlib_s32 i;                          /* loop variable */
       
   643   const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
       
   644   const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
       
   645   const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
       
   646   const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
       
   647 
       
   648   sp = (void *)src;
       
   649   dl = dst;
       
   650   dp = (mlib_d64 *) dl;
       
   651 
       
   652   vis_alignaddr((void *)0, 7);
       
   653 
       
   654   s0 = *sp++;
       
   655 
       
   656   if (xsize >= 2) {
       
   657 
       
   658     s1 = sp[0];
       
   659     s2 = sp[1];
       
   660     sp += 2;
       
   661 
       
   662 #pragma pipeloop(0)
       
   663     for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
       
   664       t7 = VIS_LD_U8_I(tab0, s2);
       
   665       t6 = VIS_LD_U8_I(tab3, s1);
       
   666       t5 = VIS_LD_U8_I(tab2, s1);
       
   667       t4 = VIS_LD_U8_I(tab1, s1);
       
   668       t3 = VIS_LD_U8_I(tab0, s1);
       
   669       t2 = VIS_LD_U8_I(tab3, s0);
       
   670       t1 = VIS_LD_U8_I(tab2, s0);
       
   671       t0 = VIS_LD_U8_I(tab1, s0);
       
   672       acc = vis_faligndata(t7, acc);
       
   673       acc = vis_faligndata(t6, acc);
       
   674       acc = vis_faligndata(t5, acc);
       
   675       acc = vis_faligndata(t4, acc);
       
   676       acc = vis_faligndata(t3, acc);
       
   677       acc = vis_faligndata(t2, acc);
       
   678       acc = vis_faligndata(t1, acc);
       
   679       acc = vis_faligndata(t0, acc);
       
   680       s0 = s2;
       
   681       s1 = sp[0];
       
   682       s2 = sp[1];
       
   683       *dp++ = acc;
       
   684     }
       
   685 
       
   686     t7 = VIS_LD_U8_I(tab0, s2);
       
   687     t6 = VIS_LD_U8_I(tab3, s1);
       
   688     t5 = VIS_LD_U8_I(tab2, s1);
       
   689     t4 = VIS_LD_U8_I(tab1, s1);
       
   690     t3 = VIS_LD_U8_I(tab0, s1);
       
   691     t2 = VIS_LD_U8_I(tab3, s0);
       
   692     t1 = VIS_LD_U8_I(tab2, s0);
       
   693     t0 = VIS_LD_U8_I(tab1, s0);
       
   694     acc = vis_faligndata(t7, acc);
       
   695     acc = vis_faligndata(t6, acc);
       
   696     acc = vis_faligndata(t5, acc);
       
   697     acc = vis_faligndata(t4, acc);
       
   698     acc = vis_faligndata(t3, acc);
       
   699     acc = vis_faligndata(t2, acc);
       
   700     acc = vis_faligndata(t1, acc);
       
   701     acc = vis_faligndata(t0, acc);
       
   702     s0 = s2;
       
   703     *dp++ = acc;
       
   704   }
       
   705 
       
   706   dl = (mlib_u8 *) dp;
       
   707 
       
   708   if ((xsize & 1) != 0) {
       
   709     s1 = sp[0];
       
   710     t7 = VIS_LD_U8_I(tab0, s1);
       
   711     t6 = VIS_LD_U8_I(tab3, s0);
       
   712     t5 = VIS_LD_U8_I(tab2, s0);
       
   713     t4 = VIS_LD_U8_I(tab1, s0);
       
   714     acc = vis_faligndata(t7, acc);
       
   715     acc = vis_faligndata(t6, acc);
       
   716     acc = vis_faligndata(t5, acc);
       
   717     acc = vis_faligndata(t4, acc);
       
   718     *(mlib_f32 *) dl = vis_read_hi(acc);
       
   719     dl += 4;
       
   720     s0 = s1;
       
   721   }
       
   722 
       
   723   dl[0] = tab1[s0];
       
   724   dl[1] = tab2[s0];
       
   725   dl[2] = tab3[s0];
       
   726 }
       
   727 
       
   728 /***************************************************************/
       
   729 void mlib_v_ImageLookUpSI_S32_U8_4_DstOff2_D1(const mlib_s32 *src,
       
   730                                               mlib_u8        *dst,
       
   731                                               mlib_s32       xsize,
       
   732                                               const mlib_u8  **table)
       
   733 {
       
   734   mlib_s32 *sp;                        /* pointer to source data */
       
   735   mlib_s32 s0, s1, s2;                 /* source data */
       
   736   mlib_u8 *dl;                         /* pointer to start of destination */
       
   737   mlib_d64 *dp;                        /* aligned pointer to destination */
       
   738   mlib_d64 t0, t1, t2;                 /* destination data */
       
   739   mlib_d64 t3, t4, t5;                 /* destination data */
       
   740   mlib_d64 t6, t7, acc;                /* destination data */
       
   741   mlib_s32 i;                          /* loop variable */
       
   742   const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
       
   743   const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
       
   744   const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
       
   745   const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
       
   746 
       
   747   sp = (void *)src;
       
   748   dl = dst;
       
   749   dp = (mlib_d64 *) dl;
       
   750 
       
   751   vis_alignaddr((void *)0, 7);
       
   752 
       
   753   s0 = *sp++;
       
   754 
       
   755   if (xsize >= 2) {
       
   756 
       
   757     s1 = sp[0];
       
   758     s2 = sp[1];
       
   759     sp += 2;
       
   760 
       
   761 #pragma pipeloop(0)
       
   762     for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
       
   763       t7 = VIS_LD_U8_I(tab1, s2);
       
   764       t6 = VIS_LD_U8_I(tab0, s2);
       
   765       t5 = VIS_LD_U8_I(tab3, s1);
       
   766       t4 = VIS_LD_U8_I(tab2, s1);
       
   767       t3 = VIS_LD_U8_I(tab1, s1);
       
   768       t2 = VIS_LD_U8_I(tab0, s1);
       
   769       t1 = VIS_LD_U8_I(tab3, s0);
       
   770       t0 = VIS_LD_U8_I(tab2, s0);
       
   771       acc = vis_faligndata(t7, acc);
       
   772       acc = vis_faligndata(t6, acc);
       
   773       acc = vis_faligndata(t5, acc);
       
   774       acc = vis_faligndata(t4, acc);
       
   775       acc = vis_faligndata(t3, acc);
       
   776       acc = vis_faligndata(t2, acc);
       
   777       acc = vis_faligndata(t1, acc);
       
   778       acc = vis_faligndata(t0, acc);
       
   779       s0 = s2;
       
   780       s1 = sp[0];
       
   781       s2 = sp[1];
       
   782       *dp++ = acc;
       
   783     }
       
   784 
       
   785     t7 = VIS_LD_U8_I(tab1, s2);
       
   786     t6 = VIS_LD_U8_I(tab0, s2);
       
   787     t5 = VIS_LD_U8_I(tab3, s1);
       
   788     t4 = VIS_LD_U8_I(tab2, s1);
       
   789     t3 = VIS_LD_U8_I(tab1, s1);
       
   790     t2 = VIS_LD_U8_I(tab0, s1);
       
   791     t1 = VIS_LD_U8_I(tab3, s0);
       
   792     t0 = VIS_LD_U8_I(tab2, s0);
       
   793     acc = vis_faligndata(t7, acc);
       
   794     acc = vis_faligndata(t6, acc);
       
   795     acc = vis_faligndata(t5, acc);
       
   796     acc = vis_faligndata(t4, acc);
       
   797     acc = vis_faligndata(t3, acc);
       
   798     acc = vis_faligndata(t2, acc);
       
   799     acc = vis_faligndata(t1, acc);
       
   800     acc = vis_faligndata(t0, acc);
       
   801     s0 = s2;
       
   802     *dp++ = acc;
       
   803   }
       
   804 
       
   805   dl = (mlib_u8 *) dp;
       
   806 
       
   807   if ((xsize & 1) != 0) {
       
   808     s1 = sp[0];
       
   809     t7 = VIS_LD_U8_I(tab1, s1);
       
   810     t6 = VIS_LD_U8_I(tab0, s1);
       
   811     t5 = VIS_LD_U8_I(tab3, s0);
       
   812     t4 = VIS_LD_U8_I(tab2, s0);
       
   813     acc = vis_faligndata(t7, acc);
       
   814     acc = vis_faligndata(t6, acc);
       
   815     acc = vis_faligndata(t5, acc);
       
   816     acc = vis_faligndata(t4, acc);
       
   817     *(mlib_f32 *) dl = vis_read_hi(acc);
       
   818     dl += 4;
       
   819     s0 = s1;
       
   820   }
       
   821 
       
   822   dl[0] = tab2[s0];
       
   823   dl[1] = tab3[s0];
       
   824 }
       
   825 
       
   826 /***************************************************************/
       
   827 void mlib_v_ImageLookUpSI_S32_U8_4_DstOff3_D1(const mlib_s32 *src,
       
   828                                               mlib_u8        *dst,
       
   829                                               mlib_s32       xsize,
       
   830                                               const mlib_u8  **table)
       
   831 {
       
   832   mlib_s32 *sp;                        /* pointer to source data */
       
   833   mlib_s32 s0, s1, s2;                 /* source data */
       
   834   mlib_u8 *dl;                         /* pointer to start of destination */
       
   835   mlib_d64 *dp;                        /* aligned pointer to destination */
       
   836   mlib_d64 t0, t1, t2;                 /* destination data */
       
   837   mlib_d64 t3, t4, t5;                 /* destination data */
       
   838   mlib_d64 t6, t7, acc;                /* destination data */
       
   839   mlib_s32 i;                          /* loop variable */
       
   840   const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
       
   841   const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
       
   842   const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
       
   843   const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
       
   844 
       
   845   sp = (void *)src;
       
   846   dl = dst;
       
   847   dp = (mlib_d64 *) dl;
       
   848 
       
   849   vis_alignaddr((void *)0, 7);
       
   850 
       
   851   s0 = *sp++;
       
   852 
       
   853   if (xsize >= 2) {
       
   854 
       
   855     s1 = sp[0];
       
   856     s2 = sp[1];
       
   857     sp += 2;
       
   858 
       
   859 #pragma pipeloop(0)
       
   860     for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
       
   861       t7 = VIS_LD_U8_I(tab2, s2);
       
   862       t6 = VIS_LD_U8_I(tab1, s2);
       
   863       t5 = VIS_LD_U8_I(tab0, s2);
       
   864       t4 = VIS_LD_U8_I(tab3, s1);
       
   865       t3 = VIS_LD_U8_I(tab2, s1);
       
   866       t2 = VIS_LD_U8_I(tab1, s1);
       
   867       t1 = VIS_LD_U8_I(tab0, s1);
       
   868       t0 = VIS_LD_U8_I(tab3, s0);
       
   869       acc = vis_faligndata(t7, acc);
       
   870       acc = vis_faligndata(t6, acc);
       
   871       acc = vis_faligndata(t5, acc);
       
   872       acc = vis_faligndata(t4, acc);
       
   873       acc = vis_faligndata(t3, acc);
       
   874       acc = vis_faligndata(t2, acc);
       
   875       acc = vis_faligndata(t1, acc);
       
   876       acc = vis_faligndata(t0, acc);
       
   877       s0 = s2;
       
   878       s1 = sp[0];
       
   879       s2 = sp[1];
       
   880       *dp++ = acc;
       
   881     }
       
   882 
       
   883     t7 = VIS_LD_U8_I(tab2, s2);
       
   884     t6 = VIS_LD_U8_I(tab1, s2);
       
   885     t5 = VIS_LD_U8_I(tab0, s2);
       
   886     t4 = VIS_LD_U8_I(tab3, s1);
       
   887     t3 = VIS_LD_U8_I(tab2, s1);
       
   888     t2 = VIS_LD_U8_I(tab1, s1);
       
   889     t1 = VIS_LD_U8_I(tab0, s1);
       
   890     t0 = VIS_LD_U8_I(tab3, s0);
       
   891     acc = vis_faligndata(t7, acc);
       
   892     acc = vis_faligndata(t6, acc);
       
   893     acc = vis_faligndata(t5, acc);
       
   894     acc = vis_faligndata(t4, acc);
       
   895     acc = vis_faligndata(t3, acc);
       
   896     acc = vis_faligndata(t2, acc);
       
   897     acc = vis_faligndata(t1, acc);
       
   898     acc = vis_faligndata(t0, acc);
       
   899     s0 = s2;
       
   900     *dp++ = acc;
       
   901   }
       
   902 
       
   903   dl = (mlib_u8 *) dp;
       
   904 
       
   905   if ((xsize & 1) != 0) {
       
   906     s1 = sp[0];
       
   907     t7 = VIS_LD_U8_I(tab2, s1);
       
   908     t6 = VIS_LD_U8_I(tab1, s1);
       
   909     t5 = VIS_LD_U8_I(tab0, s1);
       
   910     t4 = VIS_LD_U8_I(tab3, s0);
       
   911     acc = vis_faligndata(t7, acc);
       
   912     acc = vis_faligndata(t6, acc);
       
   913     acc = vis_faligndata(t5, acc);
       
   914     acc = vis_faligndata(t4, acc);
       
   915     *(mlib_f32 *) dl = vis_read_hi(acc);
       
   916     dl += 4;
       
   917     s0 = s1;
       
   918   }
       
   919 
       
   920   dl[0] = tab3[s0];
       
   921 }
       
   922 
       
   923 /***************************************************************/
       
   924 void mlib_v_ImageLookUpSI_S32_U8_4(const mlib_s32 *src,
       
   925                                    mlib_s32       slb,
       
   926                                    mlib_u8        *dst,
       
   927                                    mlib_s32       dlb,
       
   928                                    mlib_s32       xsize,
       
   929                                    mlib_s32       ysize,
       
   930                                    const mlib_u8  **table)
       
   931 {
       
   932   mlib_s32 *sl;
       
   933   mlib_u8 *dl;
       
   934   mlib_s32 j;
       
   935   const mlib_u8 *tab0 = &table[0][(mlib_u32) 2147483648u];
       
   936   const mlib_u8 *tab1 = &table[1][(mlib_u32) 2147483648u];
       
   937   const mlib_u8 *tab2 = &table[2][(mlib_u32) 2147483648u];
       
   938   const mlib_u8 *tab3 = &table[3][(mlib_u32) 2147483648u];
       
   939 
       
   940   sl = (void *)src;
       
   941   dl = dst;
       
   942 
       
   943   /* row loop */
       
   944   for (j = 0; j < ysize; j++) {
       
   945     mlib_s32 *sp = sl;
       
   946     mlib_u8 *dp = dl;
       
   947     mlib_s32 off, s0, size = xsize;
       
   948 
       
   949     off = (mlib_s32) ((8 - ((mlib_addr) dp & 7)) & 7);
       
   950 
       
   951     if ((off >= 4) && (size > 0)) {
       
   952       s0 = *sp++;
       
   953       *dp++ = tab0[s0];
       
   954       *dp++ = tab1[s0];
       
   955       *dp++ = tab2[s0];
       
   956       *dp++ = tab3[s0];
       
   957       size--;
       
   958     }
       
   959 
       
   960     if (size > 0) {
       
   961       off = (mlib_s32) ((4 - ((mlib_addr) dp & 3)) & 3);
       
   962 
       
   963       if (off == 0) {
       
   964         mlib_v_ImageLookUpSI_S32_U8_4_DstOff0_D1(sp, dp, size, table);
       
   965       }
       
   966       else if (off == 1) {
       
   967         s0 = *sp;
       
   968         *dp++ = tab0[s0];
       
   969         size--;
       
   970         mlib_v_ImageLookUpSI_S32_U8_4_DstOff1_D1(sp, dp, size, table);
       
   971       }
       
   972       else if (off == 2) {
       
   973         s0 = *sp;
       
   974         *dp++ = tab0[s0];
       
   975         *dp++ = tab1[s0];
       
   976         size--;
       
   977         mlib_v_ImageLookUpSI_S32_U8_4_DstOff2_D1(sp, dp, size, table);
       
   978       }
       
   979       else if (off == 3) {
       
   980         s0 = *sp;
       
   981         *dp++ = tab0[s0];
       
   982         *dp++ = tab1[s0];
       
   983         *dp++ = tab2[s0];
       
   984         size--;
       
   985         mlib_v_ImageLookUpSI_S32_U8_4_DstOff3_D1(sp, dp, size, table);
       
   986       }
       
   987     }
       
   988 
       
   989     sl = (mlib_s32 *) ((mlib_u8 *) sl + slb);
       
   990     dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
       
   991   }
       
   992 }
       
   993 
       
   994 /***************************************************************/