jdk/src/solaris/native/sun/awt/medialib/mlib_v_ImageLookUpU8U16Func.c
changeset 2 90ce3da70b43
child 5506 202f599c92aa
equal deleted inserted replaced
0:fd16c54261b3 2:90ce3da70b43
       
     1 /*
       
     2  * Copyright 2003 Sun Microsystems, Inc.  All Rights Reserved.
       
     3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
       
     4  *
       
     5  * This code is free software; you can redistribute it and/or modify it
       
     6  * under the terms of the GNU General Public License version 2 only, as
       
     7  * published by the Free Software Foundation.  Sun designates this
       
     8  * particular file as subject to the "Classpath" exception as provided
       
     9  * by Sun in the LICENSE file that accompanied this code.
       
    10  *
       
    11  * This code is distributed in the hope that it will be useful, but WITHOUT
       
    12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       
    13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       
    14  * version 2 for more details (a copy is included in the LICENSE file that
       
    15  * accompanied this code).
       
    16  *
       
    17  * You should have received a copy of the GNU General Public License version
       
    18  * 2 along with this work; if not, write to the Free Software Foundation,
       
    19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       
    20  *
       
    21  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       
    22  * CA 95054 USA or visit www.sun.com if you need additional information or
       
    23  * have any questions.
       
    24  */
       
    25 
       
    26 
       
    27 
       
    28 #include "vis_proto.h"
       
    29 #include "mlib_image.h"
       
    30 #include "mlib_v_ImageLookUpFunc.h"
       
    31 
       
    32 /***************************************************************/
       
    33 static void mlib_v_ImageLookUp_U8_U16_124_SrcOff0_D1(const mlib_u8  *src,
       
    34                                                      mlib_u16       *dst,
       
    35                                                      mlib_s32       xsize,
       
    36                                                      const mlib_u16 *table0,
       
    37                                                      const mlib_u16 *table1,
       
    38                                                      const mlib_u16 *table2,
       
    39                                                      const mlib_u16 *table3);
       
    40 
       
    41 static void mlib_v_ImageLookUp_U8_U16_124_SrcOff1_D1(const mlib_u8  *src,
       
    42                                                      mlib_u16       *dst,
       
    43                                                      mlib_s32       xsize,
       
    44                                                      const mlib_u16 *table0,
       
    45                                                      const mlib_u16 *table1,
       
    46                                                      const mlib_u16 *table2,
       
    47                                                      const mlib_u16 *table3);
       
    48 
       
    49 static void mlib_v_ImageLookUp_U8_U16_124_SrcOff2_D1(const mlib_u8  *src,
       
    50                                                      mlib_u16       *dst,
       
    51                                                      mlib_s32       xsize,
       
    52                                                      const mlib_u16 *table0,
       
    53                                                      const mlib_u16 *table1,
       
    54                                                      const mlib_u16 *table2,
       
    55                                                      const mlib_u16 *table3);
       
    56 
       
    57 static void mlib_v_ImageLookUp_U8_U16_124_SrcOff3_D1(const mlib_u8  *src,
       
    58                                                      mlib_u16       *dst,
       
    59                                                      mlib_s32       xsize,
       
    60                                                      const mlib_u16 *table0,
       
    61                                                      const mlib_u16 *table1,
       
    62                                                      const mlib_u16 *table2,
       
    63                                                      const mlib_u16 *table3);
       
    64 
       
    65 static void mlib_v_ImageLookUp_U8_U16_3_SrcOff0_D1(const mlib_u8  *src,
       
    66                                                    mlib_u16       *dst,
       
    67                                                    mlib_s32       xsize,
       
    68                                                    const mlib_u16 *table0,
       
    69                                                    const mlib_u16 *table1,
       
    70                                                    const mlib_u16 *table2);
       
    71 
       
    72 static void mlib_v_ImageLookUp_U8_U16_3_SrcOff1_D1(const mlib_u8  *src,
       
    73                                                    mlib_u16       *dst,
       
    74                                                    mlib_s32       xsize,
       
    75                                                    const mlib_u16 *table0,
       
    76                                                    const mlib_u16 *table1,
       
    77                                                    const mlib_u16 *table2);
       
    78 
       
    79 static void mlib_v_ImageLookUp_U8_U16_3_SrcOff2_D1(const mlib_u8  *src,
       
    80                                                    mlib_u16       *dst,
       
    81                                                    mlib_s32       xsize,
       
    82                                                    const mlib_u16 *table0,
       
    83                                                    const mlib_u16 *table1,
       
    84                                                    const mlib_u16 *table2);
       
    85 
       
    86 static void mlib_v_ImageLookUp_U8_U16_3_SrcOff3_D1(const mlib_u8  *src,
       
    87                                                    mlib_u16       *dst,
       
    88                                                    mlib_s32       xsize,
       
    89                                                    const mlib_u16 *table0,
       
    90                                                    const mlib_u16 *table1,
       
    91                                                    const mlib_u16 *table2);
       
    92 
       
    93 /***************************************************************/
       
    94 #define VIS_LD_U16_I(X, Y)      vis_ld_u16_i((void *)(X), (Y))
       
    95 
       
    96 /***************************************************************/
       
    97 void mlib_v_ImageLookUp_U8_U16_124_SrcOff0_D1(const mlib_u8  *src,
       
    98                                               mlib_u16       *dst,
       
    99                                               mlib_s32       xsize,
       
   100                                               const mlib_u16 *table0,
       
   101                                               const mlib_u16 *table1,
       
   102                                               const mlib_u16 *table2,
       
   103                                               const mlib_u16 *table3)
       
   104 {
       
   105   mlib_u32 *sa;          /* aligned pointer to source data */
       
   106   mlib_u8  *sp;          /* pointer to source data */
       
   107   mlib_u32 s0;           /* source data */
       
   108   mlib_u16 *dl;          /* pointer to start of destination */
       
   109   mlib_u16 *dend;        /* pointer to end of destination */
       
   110   mlib_d64 *dp;          /* aligned pointer to destination */
       
   111   mlib_d64 t0, t1, t2;   /* destination data */
       
   112   mlib_d64 t3, acc0;     /* destination data */
       
   113   mlib_s32 emask;        /* edge mask */
       
   114   mlib_s32 i, num;       /* loop variable */
       
   115 
       
   116   sa   = (mlib_u32*)src;
       
   117   dl   = dst;
       
   118   dp   = (mlib_d64 *) dl;
       
   119   dend = dl + xsize - 1;
       
   120 
       
   121   vis_alignaddr((void *) 0, 6);
       
   122 
       
   123   i = 0;
       
   124 
       
   125   if (xsize >= 4) {
       
   126 
       
   127     s0 = *sa++;
       
   128 
       
   129 #pragma pipeloop(0)
       
   130     for(i = 0; i <= xsize - 8; i+=4) {
       
   131       t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE);
       
   132       t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
       
   133       t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
       
   134       t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
       
   135       acc0 = vis_faligndata(t3, acc0);
       
   136       acc0 = vis_faligndata(t2, acc0);
       
   137       acc0 = vis_faligndata(t1, acc0);
       
   138       acc0 = vis_faligndata(t0, acc0);
       
   139       s0 = *sa++;
       
   140       *dp++ = acc0;
       
   141     }
       
   142 
       
   143     t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE);
       
   144     t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
       
   145     t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
       
   146     t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
       
   147     acc0 = vis_faligndata(t3, acc0);
       
   148     acc0 = vis_faligndata(t2, acc0);
       
   149     acc0 = vis_faligndata(t1, acc0);
       
   150     acc0 = vis_faligndata(t0, acc0);
       
   151     *dp++ = acc0;
       
   152   }
       
   153 
       
   154   sp = (mlib_u8*)sa;
       
   155 
       
   156   if ((mlib_addr) dp <= (mlib_addr) dend) {
       
   157 
       
   158     num = (mlib_u16*) dend - (mlib_u16*) dp;
       
   159     sp  += num;
       
   160     num ++;
       
   161 
       
   162     if (num == 1) {
       
   163       s0 = (mlib_s32) *sp;
       
   164       sp --;
       
   165 
       
   166       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   167       acc0 = vis_faligndata(t0, acc0);
       
   168     } else if (num  == 2) {
       
   169       s0 = (mlib_s32) *sp;
       
   170       sp --;
       
   171 
       
   172       t0  = VIS_LD_U16_I(table1, 2*s0);
       
   173       acc0 = vis_faligndata(t0, acc0);
       
   174 
       
   175       s0 = (mlib_s32) *sp;
       
   176       sp --;
       
   177 
       
   178       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   179       acc0 = vis_faligndata(t0, acc0);
       
   180     } else if (num == 3) {
       
   181       s0 = (mlib_s32) *sp;
       
   182       sp --;
       
   183 
       
   184       t0  = VIS_LD_U16_I(table2, 2*s0);
       
   185       acc0 = vis_faligndata(t0, acc0);
       
   186 
       
   187       s0 = (mlib_s32) *sp;
       
   188       sp --;
       
   189 
       
   190       t0  = VIS_LD_U16_I(table1, 2*s0);
       
   191       acc0 = vis_faligndata(t0, acc0);
       
   192 
       
   193       s0 = (mlib_s32) *sp;
       
   194       sp --;
       
   195 
       
   196       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   197       acc0 = vis_faligndata(t0, acc0);
       
   198     }
       
   199 
       
   200     emask = vis_edge16(dp, dend);
       
   201     vis_pst_16(acc0, dp, emask);
       
   202   }
       
   203 }
       
   204 
       
   205 /***************************************************************/
       
   206 void mlib_v_ImageLookUp_U8_U16_124_SrcOff1_D1(const mlib_u8  *src,
       
   207                                               mlib_u16       *dst,
       
   208                                               mlib_s32       xsize,
       
   209                                               const mlib_u16 *table0,
       
   210                                               const mlib_u16 *table1,
       
   211                                               const mlib_u16 *table2,
       
   212                                               const mlib_u16 *table3)
       
   213 {
       
   214   mlib_u32 *sa;          /* aligned pointer to source data */
       
   215   mlib_u8  *sp;          /* pointer to source data */
       
   216   mlib_u32 s0, s1;       /* source data */
       
   217   mlib_u16 *dl;          /* pointer to start of destination */
       
   218   mlib_u16 *dend;        /* pointer to end of destination */
       
   219   mlib_d64 *dp;          /* aligned pointer to destination */
       
   220   mlib_d64 t0, t1, t2;   /* destination data */
       
   221   mlib_d64 t3, acc0;     /* destination data */
       
   222   mlib_s32 emask;        /* edge mask */
       
   223   mlib_s32 i, num;       /* loop variable */
       
   224 
       
   225   sa   = (mlib_u32*)(src - 1);
       
   226   dl   = dst;
       
   227   dp   = (mlib_d64 *) dl;
       
   228   dend = dl + xsize - 1;
       
   229 
       
   230   vis_alignaddr((void *) 0, 6);
       
   231 
       
   232   s0 = *sa++;
       
   233 
       
   234   if (xsize >= 4) {
       
   235 
       
   236     s1 = *sa++;
       
   237 
       
   238 #pragma pipeloop(0)
       
   239     for(i = 0; i <= xsize - 8; i+=4) {
       
   240       t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE);
       
   241       t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
       
   242       t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
       
   243       t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
       
   244       acc0 = vis_faligndata(t3, acc0);
       
   245       acc0 = vis_faligndata(t2, acc0);
       
   246       acc0 = vis_faligndata(t1, acc0);
       
   247       acc0 = vis_faligndata(t0, acc0);
       
   248       s0 = s1;
       
   249       s1 = *sa++;
       
   250       *dp++ = acc0;
       
   251     }
       
   252 
       
   253     t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE);
       
   254     t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
       
   255     t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
       
   256     t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
       
   257     acc0 = vis_faligndata(t3, acc0);
       
   258     acc0 = vis_faligndata(t2, acc0);
       
   259     acc0 = vis_faligndata(t1, acc0);
       
   260     acc0 = vis_faligndata(t0, acc0);
       
   261     s0 = s1;
       
   262     *dp++ = acc0;
       
   263   }
       
   264 
       
   265   sp = (mlib_u8*)sa;
       
   266   sp -= 3;
       
   267 
       
   268   if ((mlib_addr) dp <= (mlib_addr) dend) {
       
   269 
       
   270     num = (mlib_u16*) dend - (mlib_u16*) dp;
       
   271     sp  += num;
       
   272     num ++;
       
   273 
       
   274     if (num == 1) {
       
   275       s0 = (mlib_s32) *sp;
       
   276       sp --;
       
   277 
       
   278       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   279       acc0 = vis_faligndata(t0, acc0);
       
   280     } else if (num  == 2) {
       
   281       s0 = (mlib_s32) *sp;
       
   282       sp --;
       
   283 
       
   284       t0  = VIS_LD_U16_I(table1, 2*s0);
       
   285       acc0 = vis_faligndata(t0, acc0);
       
   286 
       
   287       s0 = (mlib_s32) *sp;
       
   288       sp --;
       
   289 
       
   290       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   291       acc0 = vis_faligndata(t0, acc0);
       
   292     } else if (num == 3) {
       
   293       s0 = (mlib_s32) *sp;
       
   294       sp --;
       
   295 
       
   296       t0  = VIS_LD_U16_I(table2, 2*s0);
       
   297       acc0 = vis_faligndata(t0, acc0);
       
   298 
       
   299       s0 = (mlib_s32) *sp;
       
   300       sp --;
       
   301 
       
   302       t0  = VIS_LD_U16_I(table1, 2*s0);
       
   303       acc0 = vis_faligndata(t0, acc0);
       
   304 
       
   305       s0 = (mlib_s32) *sp;
       
   306       sp --;
       
   307 
       
   308       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   309       acc0 = vis_faligndata(t0, acc0);
       
   310     }
       
   311 
       
   312     emask = vis_edge16(dp, dend);
       
   313     vis_pst_16(acc0, dp, emask);
       
   314   }
       
   315 }
       
   316 
       
   317 /***************************************************************/
       
   318 void mlib_v_ImageLookUp_U8_U16_124_SrcOff2_D1(const mlib_u8  *src,
       
   319                                               mlib_u16       *dst,
       
   320                                               mlib_s32       xsize,
       
   321                                               const mlib_u16 *table0,
       
   322                                               const mlib_u16 *table1,
       
   323                                               const mlib_u16 *table2,
       
   324                                               const mlib_u16 *table3)
       
   325 {
       
   326   mlib_u32 *sa;          /* aligned pointer to source data */
       
   327   mlib_u8  *sp;          /* pointer to source data */
       
   328   mlib_u32 s0, s1;       /* source data */
       
   329   mlib_u16 *dl;          /* pointer to start of destination */
       
   330   mlib_u16 *dend;        /* pointer to end of destination */
       
   331   mlib_d64 *dp;          /* aligned pointer to destination */
       
   332   mlib_d64 t0, t1, t2;   /* destination data */
       
   333   mlib_d64 t3, acc0;     /* destination data */
       
   334   mlib_s32 emask;        /* edge mask */
       
   335   mlib_s32 i, num;       /* loop variable */
       
   336 
       
   337   sa   = (mlib_u32*)(src - 2);
       
   338   dl   = dst;
       
   339   dp   = (mlib_d64 *) dl;
       
   340   dend = dl + xsize - 1;
       
   341 
       
   342   vis_alignaddr((void *) 0, 6);
       
   343 
       
   344   s0 = *sa++;
       
   345 
       
   346   if (xsize >= 4) {
       
   347 
       
   348     s1 = *sa++;
       
   349 
       
   350 #pragma pipeloop(0)
       
   351     for(i = 0; i <= xsize - 8; i+=4) {
       
   352       t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE);
       
   353       t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
       
   354       t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
       
   355       t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
       
   356       acc0 = vis_faligndata(t3, acc0);
       
   357       acc0 = vis_faligndata(t2, acc0);
       
   358       acc0 = vis_faligndata(t1, acc0);
       
   359       acc0 = vis_faligndata(t0, acc0);
       
   360       s0 = s1;
       
   361       s1 = *sa++;
       
   362       *dp++ = acc0;
       
   363     }
       
   364 
       
   365     t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE);
       
   366     t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
       
   367     t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
       
   368     t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
       
   369     acc0 = vis_faligndata(t3, acc0);
       
   370     acc0 = vis_faligndata(t2, acc0);
       
   371     acc0 = vis_faligndata(t1, acc0);
       
   372     acc0 = vis_faligndata(t0, acc0);
       
   373     s0 = s1;
       
   374     *dp++ = acc0;
       
   375   }
       
   376 
       
   377   sp = (mlib_u8*)sa;
       
   378   sp -= 2;
       
   379 
       
   380   if ((mlib_addr) dp <= (mlib_addr) dend) {
       
   381 
       
   382     num = (mlib_u16*) dend - (mlib_u16*) dp;
       
   383     sp  += num;
       
   384     num ++;
       
   385 
       
   386     if (num == 1) {
       
   387       s0 = (mlib_s32) *sp;
       
   388       sp --;
       
   389 
       
   390       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   391       acc0 = vis_faligndata(t0, acc0);
       
   392     } else if (num  == 2) {
       
   393       s0 = (mlib_s32) *sp;
       
   394       sp --;
       
   395 
       
   396       t0  = VIS_LD_U16_I(table1, 2*s0);
       
   397       acc0 = vis_faligndata(t0, acc0);
       
   398 
       
   399       s0 = (mlib_s32) *sp;
       
   400       sp --;
       
   401 
       
   402       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   403       acc0 = vis_faligndata(t0, acc0);
       
   404     } else if (num == 3) {
       
   405       s0 = (mlib_s32) *sp;
       
   406       sp --;
       
   407 
       
   408       t0  = VIS_LD_U16_I(table2, 2*s0);
       
   409       acc0 = vis_faligndata(t0, acc0);
       
   410 
       
   411       s0 = (mlib_s32) *sp;
       
   412       sp --;
       
   413 
       
   414       t0  = VIS_LD_U16_I(table1, 2*s0);
       
   415       acc0 = vis_faligndata(t0, acc0);
       
   416 
       
   417       s0 = (mlib_s32) *sp;
       
   418       sp --;
       
   419 
       
   420       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   421       acc0 = vis_faligndata(t0, acc0);
       
   422     }
       
   423 
       
   424     emask = vis_edge16(dp, dend);
       
   425     vis_pst_16(acc0, dp, emask);
       
   426   }
       
   427 }
       
   428 
       
   429 /***************************************************************/
       
   430 void mlib_v_ImageLookUp_U8_U16_124_SrcOff3_D1(const mlib_u8  *src,
       
   431                                               mlib_u16       *dst,
       
   432                                               mlib_s32       xsize,
       
   433                                               const mlib_u16 *table0,
       
   434                                               const mlib_u16 *table1,
       
   435                                               const mlib_u16 *table2,
       
   436                                               const mlib_u16 *table3)
       
   437 {
       
   438   mlib_u32 *sa;          /* aligned pointer to source data */
       
   439   mlib_u8  *sp;          /* pointer to source data */
       
   440   mlib_u32 s0, s1;       /* source data */
       
   441   mlib_u16 *dl;          /* pointer to start of destination */
       
   442   mlib_u16 *dend;        /* pointer to end of destination */
       
   443   mlib_d64 *dp;          /* aligned pointer to destination */
       
   444   mlib_d64 t0, t1, t2;   /* destination data */
       
   445   mlib_d64 t3, acc0;     /* destination data */
       
   446   mlib_s32 emask;        /* edge mask */
       
   447   mlib_s32 i, num;       /* loop variable */
       
   448 
       
   449   sa   = (mlib_u32*)(src - 3);
       
   450   dl   = dst;
       
   451   dp   = (mlib_d64 *) dl;
       
   452   dend = dl + xsize - 1;
       
   453 
       
   454   vis_alignaddr((void *) 0, 6);
       
   455 
       
   456   s0 = *sa++;
       
   457 
       
   458   if (xsize >= 4) {
       
   459 
       
   460     s1 = *sa++;
       
   461 
       
   462 #pragma pipeloop(0)
       
   463     for(i = 0; i <= xsize - 8; i+=4) {
       
   464       t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE);
       
   465       t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
       
   466       t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
       
   467       t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
       
   468       acc0 = vis_faligndata(t3, acc0);
       
   469       acc0 = vis_faligndata(t2, acc0);
       
   470       acc0 = vis_faligndata(t1, acc0);
       
   471       acc0 = vis_faligndata(t0, acc0);
       
   472       s0 = s1;
       
   473       s1 = *sa++;
       
   474       *dp++ = acc0;
       
   475     }
       
   476 
       
   477     t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE);
       
   478     t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
       
   479     t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
       
   480     t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
       
   481     acc0 = vis_faligndata(t3, acc0);
       
   482     acc0 = vis_faligndata(t2, acc0);
       
   483     acc0 = vis_faligndata(t1, acc0);
       
   484     acc0 = vis_faligndata(t0, acc0);
       
   485     s0 = s1;
       
   486     *dp++ = acc0;
       
   487   }
       
   488 
       
   489   sp = (mlib_u8*)sa;
       
   490   sp -= 1;
       
   491 
       
   492   if ((mlib_addr) dp <= (mlib_addr) dend) {
       
   493 
       
   494     num = (mlib_u16*) dend - (mlib_u16*) dp;
       
   495     sp  += num;
       
   496     num ++;
       
   497 
       
   498     if (num == 1) {
       
   499       s0 = (mlib_s32) *sp;
       
   500       sp --;
       
   501 
       
   502       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   503       acc0 = vis_faligndata(t0, acc0);
       
   504     } else if (num  == 2) {
       
   505       s0 = (mlib_s32) *sp;
       
   506       sp --;
       
   507 
       
   508       t0  = VIS_LD_U16_I(table1, 2*s0);
       
   509       acc0 = vis_faligndata(t0, acc0);
       
   510 
       
   511       s0 = (mlib_s32) *sp;
       
   512       sp --;
       
   513 
       
   514       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   515       acc0 = vis_faligndata(t0, acc0);
       
   516     } else if (num == 3) {
       
   517       s0 = (mlib_s32) *sp;
       
   518       sp --;
       
   519 
       
   520       t0  = VIS_LD_U16_I(table2, 2*s0);
       
   521       acc0 = vis_faligndata(t0, acc0);
       
   522 
       
   523       s0 = (mlib_s32) *sp;
       
   524       sp --;
       
   525 
       
   526       t0  = VIS_LD_U16_I(table1, 2*s0);
       
   527       acc0 = vis_faligndata(t0, acc0);
       
   528 
       
   529       s0 = (mlib_s32) *sp;
       
   530       sp --;
       
   531 
       
   532       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   533       acc0 = vis_faligndata(t0, acc0);
       
   534     }
       
   535 
       
   536     emask = vis_edge16(dp, dend);
       
   537     vis_pst_16(acc0, dp, emask);
       
   538   }
       
   539 }
       
   540 
       
   541 /***************************************************************/
       
   542 void mlib_v_ImageLookUp_U8_U16_1(const mlib_u8  *src,
       
   543                                  mlib_s32       slb,
       
   544                                  mlib_u16       *dst,
       
   545                                  mlib_s32       dlb,
       
   546                                  mlib_s32       xsize,
       
   547                                  mlib_s32       ysize,
       
   548                                  const mlib_u16 **table)
       
   549 {
       
   550   mlib_u8  *sl;
       
   551   mlib_u16 *dl;
       
   552   const mlib_u16 *tab = table[0];
       
   553   mlib_s32 j, i;
       
   554 
       
   555   sl = (void *)src;
       
   556   dl = dst;
       
   557 
       
   558   /* row loop */
       
   559   for (j = 0; j < ysize; j ++) {
       
   560     mlib_u8  *sp = sl;
       
   561     mlib_u16 *dp = dl;
       
   562     mlib_s32 off, size = xsize;
       
   563 
       
   564     off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
       
   565 
       
   566     off = (off < size) ? off : size;
       
   567 
       
   568     for (i = 0; i < off; i++) {
       
   569       *dp++ = tab[(*sp++)];
       
   570       size--;
       
   571     }
       
   572 
       
   573     if (size > 0) {
       
   574 
       
   575       off = (mlib_addr)sp & 3;
       
   576 
       
   577       if (off == 0) {
       
   578         mlib_v_ImageLookUp_U8_U16_124_SrcOff0_D1(sp, dp, size, tab, tab, tab, tab);
       
   579       } else if (off == 1) {
       
   580         mlib_v_ImageLookUp_U8_U16_124_SrcOff1_D1(sp, dp, size, tab, tab, tab, tab);
       
   581       } else if (off == 2) {
       
   582         mlib_v_ImageLookUp_U8_U16_124_SrcOff2_D1(sp, dp, size, tab, tab, tab, tab);
       
   583       } else {
       
   584         mlib_v_ImageLookUp_U8_U16_124_SrcOff3_D1(sp, dp, size, tab, tab, tab, tab);
       
   585       }
       
   586     }
       
   587 
       
   588     sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
       
   589     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
       
   590   }
       
   591 }
       
   592 
       
   593 /***************************************************************/
       
   594 void mlib_v_ImageLookUp_U8_U16_2(const mlib_u8  *src,
       
   595                                  mlib_s32       slb,
       
   596                                  mlib_u16       *dst,
       
   597                                  mlib_s32       dlb,
       
   598                                  mlib_s32       xsize,
       
   599                                  mlib_s32       ysize,
       
   600                                  const mlib_u16 **table)
       
   601 {
       
   602   mlib_u8   *sl;
       
   603   mlib_u16  *dl;
       
   604   const mlib_u16  *tab;
       
   605   mlib_s32  j, i;
       
   606 
       
   607   sl = (void *)src;
       
   608   dl = dst;
       
   609 
       
   610   /* row loop */
       
   611   for (j = 0; j < ysize; j ++) {
       
   612     mlib_u8   *sp = sl;
       
   613     mlib_u16  *dp = dl;
       
   614     mlib_s32  off, size = xsize * 2;
       
   615     const mlib_u16  *tab0 = table[0];
       
   616     const mlib_u16  *tab1 = table[1];
       
   617 
       
   618     off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
       
   619 
       
   620     off = (off < size) ? off : size;
       
   621 
       
   622     for (i = 0; i < off - 1; i+=2) {
       
   623       *dp++ = tab0[(*sp++)];
       
   624       *dp++ = tab1[(*sp++)];
       
   625       size-=2;
       
   626     }
       
   627 
       
   628     if ((off & 1) != 0) {
       
   629       *dp++ = tab0[(*sp++)];
       
   630       size--;
       
   631       tab = tab0; tab0 = tab1; tab1 = tab;
       
   632     }
       
   633 
       
   634     if (size > 0) {
       
   635 
       
   636       off = (mlib_addr)sp & 3;
       
   637 
       
   638       if (off == 0) {
       
   639         mlib_v_ImageLookUp_U8_U16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab0, tab1);
       
   640       } else if (off == 1) {
       
   641         mlib_v_ImageLookUp_U8_U16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab0, tab1);
       
   642       } else if (off == 2) {
       
   643         mlib_v_ImageLookUp_U8_U16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab0, tab1);
       
   644       } else {
       
   645         mlib_v_ImageLookUp_U8_U16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab0, tab1);
       
   646       }
       
   647     }
       
   648 
       
   649     sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
       
   650     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
       
   651   }
       
   652 }
       
   653 
       
   654 /***************************************************************/
       
   655 void mlib_v_ImageLookUp_U8_U16_4(const mlib_u8  *src,
       
   656                                  mlib_s32       slb,
       
   657                                  mlib_u16       *dst,
       
   658                                  mlib_s32       dlb,
       
   659                                  mlib_s32       xsize,
       
   660                                  mlib_s32       ysize,
       
   661                                  const mlib_u16 **table)
       
   662 {
       
   663   mlib_u8   *sl;
       
   664   mlib_u16  *dl;
       
   665   const mlib_u16  *tab;
       
   666   mlib_s32  j;
       
   667 
       
   668   sl = (void *)src;
       
   669   dl = dst;
       
   670 
       
   671   /* row loop */
       
   672   for (j = 0; j < ysize; j ++) {
       
   673     mlib_u8   *sp = sl;
       
   674     mlib_u16  *dp = dl;
       
   675     const mlib_u16  *tab0 = table[0];
       
   676     const mlib_u16  *tab1 = table[1];
       
   677     const mlib_u16  *tab2 = table[2];
       
   678     const mlib_u16  *tab3 = table[3];
       
   679     mlib_s32  off, size = xsize * 4;
       
   680 
       
   681     off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
       
   682 
       
   683     off = (off < size) ? off : size;
       
   684 
       
   685     if (off == 1) {
       
   686       *dp++ = tab0[(*sp++)];
       
   687       tab = tab0; tab0 = tab1;
       
   688       tab1 = tab2; tab2 = tab3; tab3 = tab;
       
   689       size--;
       
   690     } else if (off == 2) {
       
   691       *dp++ = tab0[(*sp++)];
       
   692       *dp++ = tab1[(*sp++)];
       
   693       tab = tab0; tab0 = tab2; tab2 = tab;
       
   694       tab = tab1; tab1 = tab3; tab3 = tab;
       
   695       size-=2;
       
   696     } else if (off == 3) {
       
   697       *dp++ = tab0[(*sp++)];
       
   698       *dp++ = tab1[(*sp++)];
       
   699       *dp++ = tab2[(*sp++)];
       
   700       tab = tab3; tab3 = tab2;
       
   701       tab2 = tab1; tab1 = tab0; tab0 = tab;
       
   702       size-=3;
       
   703     }
       
   704 
       
   705     if (size > 0) {
       
   706 
       
   707       off = (mlib_addr)sp & 3;
       
   708 
       
   709       if (off == 0) {
       
   710         mlib_v_ImageLookUp_U8_U16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2, tab3);
       
   711       } else if (off == 1) {
       
   712         mlib_v_ImageLookUp_U8_U16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2, tab3);
       
   713       } else if (off == 2) {
       
   714         mlib_v_ImageLookUp_U8_U16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2, tab3);
       
   715       } else {
       
   716         mlib_v_ImageLookUp_U8_U16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2, tab3);
       
   717       }
       
   718     }
       
   719 
       
   720     sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
       
   721     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
       
   722   }
       
   723 }
       
   724 
       
   725 /***************************************************************/
       
   726 void mlib_v_ImageLookUp_U8_U16_3_SrcOff0_D1(const mlib_u8  *src,
       
   727                                             mlib_u16       *dst,
       
   728                                             mlib_s32       xsize,
       
   729                                             const mlib_u16 *table0,
       
   730                                             const mlib_u16 *table1,
       
   731                                             const mlib_u16 *table2)
       
   732 {
       
   733   mlib_u32 *sa;              /* aligned pointer to source data */
       
   734   mlib_u8  *sp;              /* pointer to source data */
       
   735   mlib_u32 s0, s1, s2;       /* source data */
       
   736   mlib_u16 *dl;              /* pointer to start of destination */
       
   737   mlib_u16 *dend;            /* pointer to end of destination */
       
   738   mlib_d64 *dp;              /* aligned pointer to destination */
       
   739   mlib_d64 t0, t1, t2;       /* destination data */
       
   740   mlib_d64 t3, t4, t5;       /* destination data */
       
   741   mlib_d64 t6, t7, t8;       /* destination data */
       
   742   mlib_d64 t9, t10, t11;     /* destination data */
       
   743   mlib_d64 acc0, acc1, acc2; /* destination data */
       
   744   mlib_s32 emask;            /* edge mask */
       
   745   mlib_s32 i, num;           /* loop variable */
       
   746   const mlib_u16 *table;
       
   747 
       
   748   sa   = (mlib_u32*)src;
       
   749   dl   = dst;
       
   750   dp   = (mlib_d64 *) dl;
       
   751   dend = dl + xsize - 1;
       
   752 
       
   753   vis_alignaddr((void *) 0, 6);
       
   754 
       
   755   i = 0;
       
   756 
       
   757   if (xsize >= 12) {
       
   758 
       
   759     s0 = sa[0];
       
   760     s1 = sa[1];
       
   761     s2 = sa[2];
       
   762     sa += 3;
       
   763 
       
   764 #pragma pipeloop(0)
       
   765     for(i = 0; i <= xsize - 24; i+=12, sa += 3, dp += 3) {
       
   766       t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
       
   767       t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
       
   768       t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
       
   769       t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
       
   770       t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
       
   771       t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
       
   772       t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
       
   773       t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
       
   774       t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
       
   775       t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
       
   776       t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
       
   777       t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
       
   778       acc0 = vis_faligndata(t3, acc0);
       
   779       acc0 = vis_faligndata(t2, acc0);
       
   780       acc0 = vis_faligndata(t1, acc0);
       
   781       acc0 = vis_faligndata(t0, acc0);
       
   782       acc1 = vis_faligndata(t7, acc1);
       
   783       acc1 = vis_faligndata(t6, acc1);
       
   784       acc1 = vis_faligndata(t5, acc1);
       
   785       acc1 = vis_faligndata(t4, acc1);
       
   786       acc2 = vis_faligndata(t11, acc2);
       
   787       acc2 = vis_faligndata(t10, acc2);
       
   788       acc2 = vis_faligndata(t9, acc2);
       
   789       acc2 = vis_faligndata(t8, acc2);
       
   790       s0 = sa[0];
       
   791       s1 = sa[1];
       
   792       s2 = sa[2];
       
   793       dp[0] = acc0;
       
   794       dp[1] = acc1;
       
   795       dp[2] = acc2;
       
   796     }
       
   797 
       
   798     t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
       
   799     t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
       
   800     t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
       
   801     t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
       
   802     t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
       
   803     t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
       
   804     t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
       
   805     t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
       
   806     t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
       
   807     t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
       
   808     t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
       
   809     t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
       
   810     acc0 = vis_faligndata(t3, acc0);
       
   811     acc0 = vis_faligndata(t2, acc0);
       
   812     acc0 = vis_faligndata(t1, acc0);
       
   813     acc0 = vis_faligndata(t0, acc0);
       
   814     acc1 = vis_faligndata(t7, acc1);
       
   815     acc1 = vis_faligndata(t6, acc1);
       
   816     acc1 = vis_faligndata(t5, acc1);
       
   817     acc1 = vis_faligndata(t4, acc1);
       
   818     acc2 = vis_faligndata(t11, acc2);
       
   819     acc2 = vis_faligndata(t10, acc2);
       
   820     acc2 = vis_faligndata(t9, acc2);
       
   821     acc2 = vis_faligndata(t8, acc2);
       
   822     dp[0] = acc0;
       
   823     dp[1] = acc1;
       
   824     dp[2] = acc2;
       
   825     dp += 3; i += 12;
       
   826   }
       
   827 
       
   828   if (i <= xsize - 8) {
       
   829     s0 = sa[0];
       
   830     s1 = sa[1];
       
   831     t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
       
   832     t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
       
   833     t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
       
   834     t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
       
   835     t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
       
   836     t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
       
   837     t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
       
   838     t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
       
   839     acc0 = vis_faligndata(t3, acc0);
       
   840     acc0 = vis_faligndata(t2, acc0);
       
   841     acc0 = vis_faligndata(t1, acc0);
       
   842     acc0 = vis_faligndata(t0, acc0);
       
   843     acc1 = vis_faligndata(t7, acc1);
       
   844     acc1 = vis_faligndata(t6, acc1);
       
   845     acc1 = vis_faligndata(t5, acc1);
       
   846     acc1 = vis_faligndata(t4, acc1);
       
   847     dp[0] = acc0;
       
   848     dp[1] = acc1;
       
   849     table = table0; table0 = table2;
       
   850     table2 = table1; table1 = table;
       
   851     sa += 2; i += 8; dp += 2;
       
   852   }
       
   853 
       
   854   if (i <= xsize - 4) {
       
   855     s0 = sa[0];
       
   856     t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
       
   857     t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
       
   858     t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
       
   859     t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
       
   860     acc0 = vis_faligndata(t3, acc0);
       
   861     acc0 = vis_faligndata(t2, acc0);
       
   862     acc0 = vis_faligndata(t1, acc0);
       
   863     acc0 = vis_faligndata(t0, acc0);
       
   864     dp[0] = acc0;
       
   865     table = table0; table0 = table1;
       
   866     table1 = table2; table2 = table;
       
   867     sa++; i += 4; dp++;
       
   868   }
       
   869 
       
   870   sp = (mlib_u8*)sa;
       
   871 
       
   872   if ((mlib_addr) dp <= (mlib_addr) dend) {
       
   873 
       
   874     num = (mlib_u16*) dend - (mlib_u16*) dp;
       
   875     sp  += num;
       
   876     num ++;
       
   877 
       
   878     if (num == 1) {
       
   879       s0 = (mlib_s32) *sp;
       
   880       sp --;
       
   881 
       
   882       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   883       acc0 = vis_faligndata(t0, acc0);
       
   884     } else if (num  == 2) {
       
   885       s0 = (mlib_s32) *sp;
       
   886       sp --;
       
   887 
       
   888       t0  = VIS_LD_U16_I(table1, 2*s0);
       
   889       acc0 = vis_faligndata(t0, acc0);
       
   890 
       
   891       s0 = (mlib_s32) *sp;
       
   892       sp --;
       
   893 
       
   894       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   895       acc0 = vis_faligndata(t0, acc0);
       
   896     } else if (num == 3) {
       
   897       s0 = (mlib_s32) *sp;
       
   898       sp --;
       
   899 
       
   900       t0  = VIS_LD_U16_I(table2, 2*s0);
       
   901       acc0 = vis_faligndata(t0, acc0);
       
   902 
       
   903       s0 = (mlib_s32) *sp;
       
   904       sp --;
       
   905 
       
   906       t0  = VIS_LD_U16_I(table1, 2*s0);
       
   907       acc0 = vis_faligndata(t0, acc0);
       
   908 
       
   909       s0 = (mlib_s32) *sp;
       
   910       sp --;
       
   911 
       
   912       t0  = VIS_LD_U16_I(table0, 2*s0);
       
   913       acc0 = vis_faligndata(t0, acc0);
       
   914     }
       
   915 
       
   916     emask = vis_edge16(dp, dend);
       
   917     vis_pst_16(acc0, dp, emask);
       
   918   }
       
   919 }
       
   920 
       
   921 /***************************************************************/
       
   922 void mlib_v_ImageLookUp_U8_U16_3_SrcOff1_D1(const mlib_u8  *src,
       
   923                                             mlib_u16       *dst,
       
   924                                             mlib_s32       xsize,
       
   925                                             const mlib_u16 *table0,
       
   926                                             const mlib_u16 *table1,
       
   927                                             const mlib_u16 *table2)
       
   928 {
       
   929   mlib_u32 *sa;              /* aligned pointer to source data */
       
   930   mlib_u8  *sp;              /* pointer to source data */
       
   931   mlib_u32 s0, s1, s2, s3;   /* source data */
       
   932   mlib_u16 *dl;              /* pointer to start of destination */
       
   933   mlib_u16 *dend;            /* pointer to end of destination */
       
   934   mlib_d64 *dp;              /* aligned pointer to destination */
       
   935   mlib_d64 t0, t1, t2;       /* destination data */
       
   936   mlib_d64 t3, t4, t5;       /* destination data */
       
   937   mlib_d64 t6, t7, t8;       /* destination data */
       
   938   mlib_d64 t9, t10, t11;     /* destination data */
       
   939   mlib_d64 acc0, acc1, acc2; /* destination data */
       
   940   mlib_s32 emask;            /* edge mask */
       
   941   mlib_s32 i, num;           /* loop variable */
       
   942   const mlib_u16 *table;
       
   943 
       
   944   sa   = (mlib_u32*)(src - 1);
       
   945   dl   = dst;
       
   946   dp   = (mlib_d64 *) dl;
       
   947   dend = dl + xsize - 1;
       
   948 
       
   949   vis_alignaddr((void *) 0, 6);
       
   950 
       
   951   i = 0;
       
   952 
       
   953   s0 = *sa++;
       
   954 
       
   955   if (xsize >= 12) {
       
   956 
       
   957     s1 = sa[0];
       
   958     s2 = sa[1];
       
   959     s3 = sa[2];
       
   960     sa += 3;
       
   961 
       
   962 #pragma pipeloop(0)
       
   963     for(i = 0; i <= xsize - 24; i+=12, sa += 3, dp += 3) {
       
   964       t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
       
   965       t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
       
   966       t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
       
   967       t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
       
   968       t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
       
   969       t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
       
   970       t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
       
   971       t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
       
   972       t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE);
       
   973       t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE);
       
   974       t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE);
       
   975       t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE);
       
   976       acc0 = vis_faligndata(t3, acc0);
       
   977       acc0 = vis_faligndata(t2, acc0);
       
   978       acc0 = vis_faligndata(t1, acc0);
       
   979       acc0 = vis_faligndata(t0, acc0);
       
   980       acc1 = vis_faligndata(t7, acc1);
       
   981       acc1 = vis_faligndata(t6, acc1);
       
   982       acc1 = vis_faligndata(t5, acc1);
       
   983       acc1 = vis_faligndata(t4, acc1);
       
   984       acc2 = vis_faligndata(t11, acc2);
       
   985       acc2 = vis_faligndata(t10, acc2);
       
   986       acc2 = vis_faligndata(t9, acc2);
       
   987       acc2 = vis_faligndata(t8, acc2);
       
   988       s0 = s3;
       
   989       s1 = sa[0];
       
   990       s2 = sa[1];
       
   991       s3 = sa[2];
       
   992       dp[0] = acc0;
       
   993       dp[1] = acc1;
       
   994       dp[2] = acc2;
       
   995     }
       
   996 
       
   997     t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
       
   998     t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
       
   999     t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
       
  1000     t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
       
  1001     t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
       
  1002     t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
       
  1003     t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
       
  1004     t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
       
  1005     t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE);
       
  1006     t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE);
       
  1007     t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE);
       
  1008     t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE);
       
  1009     acc0 = vis_faligndata(t3, acc0);
       
  1010     acc0 = vis_faligndata(t2, acc0);
       
  1011     acc0 = vis_faligndata(t1, acc0);
       
  1012     acc0 = vis_faligndata(t0, acc0);
       
  1013     acc1 = vis_faligndata(t7, acc1);
       
  1014     acc1 = vis_faligndata(t6, acc1);
       
  1015     acc1 = vis_faligndata(t5, acc1);
       
  1016     acc1 = vis_faligndata(t4, acc1);
       
  1017     acc2 = vis_faligndata(t11, acc2);
       
  1018     acc2 = vis_faligndata(t10, acc2);
       
  1019     acc2 = vis_faligndata(t9, acc2);
       
  1020     acc2 = vis_faligndata(t8, acc2);
       
  1021     dp[0] = acc0;
       
  1022     dp[1] = acc1;
       
  1023     dp[2] = acc2;
       
  1024     s0 = s3;
       
  1025     dp += 3; i += 12;
       
  1026   }
       
  1027 
       
  1028   if (i <= xsize - 8) {
       
  1029     s1 = sa[0];
       
  1030     s2 = sa[1];
       
  1031     t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
       
  1032     t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
       
  1033     t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
       
  1034     t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
       
  1035     t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
       
  1036     t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
       
  1037     t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
       
  1038     t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
       
  1039     acc0 = vis_faligndata(t3, acc0);
       
  1040     acc0 = vis_faligndata(t2, acc0);
       
  1041     acc0 = vis_faligndata(t1, acc0);
       
  1042     acc0 = vis_faligndata(t0, acc0);
       
  1043     acc1 = vis_faligndata(t7, acc1);
       
  1044     acc1 = vis_faligndata(t6, acc1);
       
  1045     acc1 = vis_faligndata(t5, acc1);
       
  1046     acc1 = vis_faligndata(t4, acc1);
       
  1047     dp[0] = acc0;
       
  1048     dp[1] = acc1;
       
  1049     table = table0; table0 = table2;
       
  1050     table2 = table1; table1 = table;
       
  1051     sa += 2; i += 8; dp += 2;
       
  1052     s0 = s2;
       
  1053   }
       
  1054 
       
  1055   if (i <= xsize - 4) {
       
  1056     s1 = sa[0];
       
  1057     t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
       
  1058     t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
       
  1059     t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
       
  1060     t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
       
  1061     acc0 = vis_faligndata(t3, acc0);
       
  1062     acc0 = vis_faligndata(t2, acc0);
       
  1063     acc0 = vis_faligndata(t1, acc0);
       
  1064     acc0 = vis_faligndata(t0, acc0);
       
  1065     dp[0] = acc0;
       
  1066     table = table0; table0 = table1;
       
  1067     table1 = table2; table2 = table;
       
  1068     sa++; i += 4; dp++;
       
  1069     s0 = s1;
       
  1070   }
       
  1071 
       
  1072   sp = (mlib_u8*)sa;
       
  1073   sp -= 3;
       
  1074 
       
  1075   if ((mlib_addr) dp <= (mlib_addr) dend) {
       
  1076 
       
  1077     num = (mlib_u16*) dend - (mlib_u16*) dp;
       
  1078     sp  += num;
       
  1079     num ++;
       
  1080 
       
  1081     if (num == 1) {
       
  1082       s0 = (mlib_s32) *sp;
       
  1083       sp --;
       
  1084 
       
  1085       t0  = VIS_LD_U16_I(table0, 2*s0);
       
  1086       acc0 = vis_faligndata(t0, acc0);
       
  1087     } else if (num  == 2) {
       
  1088       s0 = (mlib_s32) *sp;
       
  1089       sp --;
       
  1090 
       
  1091       t0  = VIS_LD_U16_I(table1, 2*s0);
       
  1092       acc0 = vis_faligndata(t0, acc0);
       
  1093 
       
  1094       s0 = (mlib_s32) *sp;
       
  1095       sp --;
       
  1096 
       
  1097       t0  = VIS_LD_U16_I(table0, 2*s0);
       
  1098       acc0 = vis_faligndata(t0, acc0);
       
  1099     } else if (num == 3) {
       
  1100       s0 = (mlib_s32) *sp;
       
  1101       sp --;
       
  1102 
       
  1103       t0  = VIS_LD_U16_I(table2, 2*s0);
       
  1104       acc0 = vis_faligndata(t0, acc0);
       
  1105 
       
  1106       s0 = (mlib_s32) *sp;
       
  1107       sp --;
       
  1108 
       
  1109       t0  = VIS_LD_U16_I(table1, 2*s0);
       
  1110       acc0 = vis_faligndata(t0, acc0);
       
  1111 
       
  1112       s0 = (mlib_s32) *sp;
       
  1113       sp --;
       
  1114 
       
  1115       t0  = VIS_LD_U16_I(table0, 2*s0);
       
  1116       acc0 = vis_faligndata(t0, acc0);
       
  1117     }
       
  1118 
       
  1119     emask = vis_edge16(dp, dend);
       
  1120     vis_pst_16(acc0, dp, emask);
       
  1121   }
       
  1122 }
       
  1123 
       
  1124 /***************************************************************/
       
  1125 void mlib_v_ImageLookUp_U8_U16_3_SrcOff2_D1(const mlib_u8  *src,
       
  1126                                             mlib_u16       *dst,
       
  1127                                             mlib_s32       xsize,
       
  1128                                             const mlib_u16 *table0,
       
  1129                                             const mlib_u16 *table1,
       
  1130                                             const mlib_u16 *table2)
       
  1131 {
       
  1132   mlib_u32 *sa;              /* aligned pointer to source data */
       
  1133   mlib_u8  *sp;              /* pointer to source data */
       
  1134   mlib_u32 s0, s1, s2, s3;   /* source data */
       
  1135   mlib_u16 *dl;              /* pointer to start of destination */
       
  1136   mlib_u16 *dend;            /* pointer to end of destination */
       
  1137   mlib_d64 *dp;              /* aligned pointer to destination */
       
  1138   mlib_d64 t0, t1, t2;       /* destination data */
       
  1139   mlib_d64 t3, t4, t5;       /* destination data */
       
  1140   mlib_d64 t6, t7, t8;       /* destination data */
       
  1141   mlib_d64 t9, t10, t11;     /* destination data */
       
  1142   mlib_d64 acc0, acc1, acc2; /* destination data */
       
  1143   mlib_s32 emask;            /* edge mask */
       
  1144   mlib_s32 i, num;           /* loop variable */
       
  1145   const mlib_u16 *table;
       
  1146 
       
  1147   sa   = (mlib_u32*)(src - 2);
       
  1148   dl   = dst;
       
  1149   dp   = (mlib_d64 *) dl;
       
  1150   dend = dl + xsize - 1;
       
  1151 
       
  1152   vis_alignaddr((void *) 0, 6);
       
  1153 
       
  1154   i = 0;
       
  1155 
       
  1156   s0 = *sa++;
       
  1157 
       
  1158   if (xsize >= 12) {
       
  1159 
       
  1160     s1 = sa[0];
       
  1161     s2 = sa[1];
       
  1162     s3 = sa[2];
       
  1163     sa += 3;
       
  1164 
       
  1165 #pragma pipeloop(0)
       
  1166     for(i = 0; i <= xsize - 24; i+=12, sa += 3, dp += 3) {
       
  1167       t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
       
  1168       t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
       
  1169       t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
       
  1170       t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
       
  1171       t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
       
  1172       t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
       
  1173       t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
       
  1174       t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
       
  1175       t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE);
       
  1176       t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE);
       
  1177       t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE);
       
  1178       t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE);
       
  1179       acc0 = vis_faligndata(t3, acc0);
       
  1180       acc0 = vis_faligndata(t2, acc0);
       
  1181       acc0 = vis_faligndata(t1, acc0);
       
  1182       acc0 = vis_faligndata(t0, acc0);
       
  1183       acc1 = vis_faligndata(t7, acc1);
       
  1184       acc1 = vis_faligndata(t6, acc1);
       
  1185       acc1 = vis_faligndata(t5, acc1);
       
  1186       acc1 = vis_faligndata(t4, acc1);
       
  1187       acc2 = vis_faligndata(t11, acc2);
       
  1188       acc2 = vis_faligndata(t10, acc2);
       
  1189       acc2 = vis_faligndata(t9, acc2);
       
  1190       acc2 = vis_faligndata(t8, acc2);
       
  1191       s0 = s3;
       
  1192       s1 = sa[0];
       
  1193       s2 = sa[1];
       
  1194       s3 = sa[2];
       
  1195       dp[0] = acc0;
       
  1196       dp[1] = acc1;
       
  1197       dp[2] = acc2;
       
  1198     }
       
  1199 
       
  1200     t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
       
  1201     t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
       
  1202     t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
       
  1203     t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
       
  1204     t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
       
  1205     t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
       
  1206     t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
       
  1207     t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
       
  1208     t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE);
       
  1209     t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE);
       
  1210     t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE);
       
  1211     t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE);
       
  1212     acc0 = vis_faligndata(t3, acc0);
       
  1213     acc0 = vis_faligndata(t2, acc0);
       
  1214     acc0 = vis_faligndata(t1, acc0);
       
  1215     acc0 = vis_faligndata(t0, acc0);
       
  1216     acc1 = vis_faligndata(t7, acc1);
       
  1217     acc1 = vis_faligndata(t6, acc1);
       
  1218     acc1 = vis_faligndata(t5, acc1);
       
  1219     acc1 = vis_faligndata(t4, acc1);
       
  1220     acc2 = vis_faligndata(t11, acc2);
       
  1221     acc2 = vis_faligndata(t10, acc2);
       
  1222     acc2 = vis_faligndata(t9, acc2);
       
  1223     acc2 = vis_faligndata(t8, acc2);
       
  1224     dp[0] = acc0;
       
  1225     dp[1] = acc1;
       
  1226     dp[2] = acc2;
       
  1227     s0 = s3;
       
  1228     dp += 3; i += 12;
       
  1229   }
       
  1230 
       
  1231   if (i <= xsize - 8) {
       
  1232     s1 = sa[0];
       
  1233     s2 = sa[1];
       
  1234     t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
       
  1235     t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
       
  1236     t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
       
  1237     t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
       
  1238     t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
       
  1239     t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
       
  1240     t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
       
  1241     t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
       
  1242     acc0 = vis_faligndata(t3, acc0);
       
  1243     acc0 = vis_faligndata(t2, acc0);
       
  1244     acc0 = vis_faligndata(t1, acc0);
       
  1245     acc0 = vis_faligndata(t0, acc0);
       
  1246     acc1 = vis_faligndata(t7, acc1);
       
  1247     acc1 = vis_faligndata(t6, acc1);
       
  1248     acc1 = vis_faligndata(t5, acc1);
       
  1249     acc1 = vis_faligndata(t4, acc1);
       
  1250     dp[0] = acc0;
       
  1251     dp[1] = acc1;
       
  1252     table = table0; table0 = table2;
       
  1253     table2 = table1; table1 = table;
       
  1254     sa += 2; i += 8; dp += 2;
       
  1255     s0 = s2;
       
  1256   }
       
  1257 
       
  1258   if (i <= xsize - 4) {
       
  1259     s1 = sa[0];
       
  1260     t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
       
  1261     t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
       
  1262     t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
       
  1263     t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
       
  1264     acc0 = vis_faligndata(t3, acc0);
       
  1265     acc0 = vis_faligndata(t2, acc0);
       
  1266     acc0 = vis_faligndata(t1, acc0);
       
  1267     acc0 = vis_faligndata(t0, acc0);
       
  1268     dp[0] = acc0;
       
  1269     table = table0; table0 = table1;
       
  1270     table1 = table2; table2 = table;
       
  1271     sa++; i += 4; dp++;
       
  1272     s0 = s1;
       
  1273   }
       
  1274 
       
  1275   sp = (mlib_u8*)sa;
       
  1276   sp -= 2;
       
  1277 
       
  1278   if ((mlib_addr) dp <= (mlib_addr) dend) {
       
  1279 
       
  1280     num = (mlib_u16*) dend - (mlib_u16*) dp;
       
  1281     sp  += num;
       
  1282     num ++;
       
  1283 
       
  1284     if (num == 1) {
       
  1285       s0 = (mlib_s32) *sp;
       
  1286       sp --;
       
  1287 
       
  1288       t0  = VIS_LD_U16_I(table0, 2*s0);
       
  1289       acc0 = vis_faligndata(t0, acc0);
       
  1290     } else if (num  == 2) {
       
  1291       s0 = (mlib_s32) *sp;
       
  1292       sp --;
       
  1293 
       
  1294       t0  = VIS_LD_U16_I(table1, 2*s0);
       
  1295       acc0 = vis_faligndata(t0, acc0);
       
  1296 
       
  1297       s0 = (mlib_s32) *sp;
       
  1298       sp --;
       
  1299 
       
  1300       t0  = VIS_LD_U16_I(table0, 2*s0);
       
  1301       acc0 = vis_faligndata(t0, acc0);
       
  1302     } else if (num == 3) {
       
  1303       s0 = (mlib_s32) *sp;
       
  1304       sp --;
       
  1305 
       
  1306       t0  = VIS_LD_U16_I(table2, 2*s0);
       
  1307       acc0 = vis_faligndata(t0, acc0);
       
  1308 
       
  1309       s0 = (mlib_s32) *sp;
       
  1310       sp --;
       
  1311 
       
  1312       t0  = VIS_LD_U16_I(table1, 2*s0);
       
  1313       acc0 = vis_faligndata(t0, acc0);
       
  1314 
       
  1315       s0 = (mlib_s32) *sp;
       
  1316       sp --;
       
  1317 
       
  1318       t0  = VIS_LD_U16_I(table0, 2*s0);
       
  1319       acc0 = vis_faligndata(t0, acc0);
       
  1320     }
       
  1321 
       
  1322     emask = vis_edge16(dp, dend);
       
  1323     vis_pst_16(acc0, dp, emask);
       
  1324   }
       
  1325 }
       
  1326 
       
  1327 /***************************************************************/
       
  1328 void mlib_v_ImageLookUp_U8_U16_3_SrcOff3_D1(const mlib_u8  *src,
       
  1329                                             mlib_u16       *dst,
       
  1330                                             mlib_s32       xsize,
       
  1331                                             const mlib_u16 *table0,
       
  1332                                             const mlib_u16 *table1,
       
  1333                                             const mlib_u16 *table2)
       
  1334 {
       
  1335   mlib_u32 *sa;              /* aligned pointer to source data */
       
  1336   mlib_u8  *sp;              /* pointer to source data */
       
  1337   mlib_u32 s0, s1, s2, s3;   /* source data */
       
  1338   mlib_u16 *dl;              /* pointer to start of destination */
       
  1339   mlib_u16 *dend;            /* pointer to end of destination */
       
  1340   mlib_d64 *dp;              /* aligned pointer to destination */
       
  1341   mlib_d64 t0, t1, t2;       /* destination data */
       
  1342   mlib_d64 t3, t4, t5;       /* destination data */
       
  1343   mlib_d64 t6, t7, t8;       /* destination data */
       
  1344   mlib_d64 t9, t10, t11;     /* destination data */
       
  1345   mlib_d64 acc0, acc1, acc2; /* destination data */
       
  1346   mlib_s32 emask;            /* edge mask */
       
  1347   mlib_s32 i, num;           /* loop variable */
       
  1348   const mlib_u16 *table;
       
  1349 
       
  1350   sa   = (mlib_u32*)(src - 3);
       
  1351   dl   = dst;
       
  1352   dp   = (mlib_d64 *) dl;
       
  1353   dend = dl + xsize - 1;
       
  1354 
       
  1355   vis_alignaddr((void *) 0, 6);
       
  1356 
       
  1357   i = 0;
       
  1358 
       
  1359   s0 = *sa++;
       
  1360 
       
  1361   if (xsize >= 12) {
       
  1362 
       
  1363     s1 = sa[0];
       
  1364     s2 = sa[1];
       
  1365     s3 = sa[2];
       
  1366     sa += 3;
       
  1367 
       
  1368 #pragma pipeloop(0)
       
  1369     for(i = 0; i <= xsize - 24; i+=12, sa += 3, dp += 3) {
       
  1370       t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
       
  1371       t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
       
  1372       t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
       
  1373       t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
       
  1374       t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
       
  1375       t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
       
  1376       t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
       
  1377       t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
       
  1378       t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE);
       
  1379       t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE);
       
  1380       t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE);
       
  1381       t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
       
  1382       acc0 = vis_faligndata(t3, acc0);
       
  1383       acc0 = vis_faligndata(t2, acc0);
       
  1384       acc0 = vis_faligndata(t1, acc0);
       
  1385       acc0 = vis_faligndata(t0, acc0);
       
  1386       acc1 = vis_faligndata(t7, acc1);
       
  1387       acc1 = vis_faligndata(t6, acc1);
       
  1388       acc1 = vis_faligndata(t5, acc1);
       
  1389       acc1 = vis_faligndata(t4, acc1);
       
  1390       acc2 = vis_faligndata(t11, acc2);
       
  1391       acc2 = vis_faligndata(t10, acc2);
       
  1392       acc2 = vis_faligndata(t9, acc2);
       
  1393       acc2 = vis_faligndata(t8, acc2);
       
  1394       s0 = s3;
       
  1395       s1 = sa[0];
       
  1396       s2 = sa[1];
       
  1397       s3 = sa[2];
       
  1398       dp[0] = acc0;
       
  1399       dp[1] = acc1;
       
  1400       dp[2] = acc2;
       
  1401     }
       
  1402 
       
  1403     t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
       
  1404     t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
       
  1405     t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
       
  1406     t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
       
  1407     t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
       
  1408     t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
       
  1409     t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
       
  1410     t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
       
  1411     t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE);
       
  1412     t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE);
       
  1413     t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE);
       
  1414     t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
       
  1415     acc0 = vis_faligndata(t3, acc0);
       
  1416     acc0 = vis_faligndata(t2, acc0);
       
  1417     acc0 = vis_faligndata(t1, acc0);
       
  1418     acc0 = vis_faligndata(t0, acc0);
       
  1419     acc1 = vis_faligndata(t7, acc1);
       
  1420     acc1 = vis_faligndata(t6, acc1);
       
  1421     acc1 = vis_faligndata(t5, acc1);
       
  1422     acc1 = vis_faligndata(t4, acc1);
       
  1423     acc2 = vis_faligndata(t11, acc2);
       
  1424     acc2 = vis_faligndata(t10, acc2);
       
  1425     acc2 = vis_faligndata(t9, acc2);
       
  1426     acc2 = vis_faligndata(t8, acc2);
       
  1427     dp[0] = acc0;
       
  1428     dp[1] = acc1;
       
  1429     dp[2] = acc2;
       
  1430     s0 = s3;
       
  1431     dp += 3; i += 12;
       
  1432   }
       
  1433 
       
  1434   if (i <= xsize - 8) {
       
  1435     s1 = sa[0];
       
  1436     s2 = sa[1];
       
  1437     t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
       
  1438     t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
       
  1439     t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
       
  1440     t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
       
  1441     t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
       
  1442     t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
       
  1443     t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
       
  1444     t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
       
  1445     acc0 = vis_faligndata(t3, acc0);
       
  1446     acc0 = vis_faligndata(t2, acc0);
       
  1447     acc0 = vis_faligndata(t1, acc0);
       
  1448     acc0 = vis_faligndata(t0, acc0);
       
  1449     acc1 = vis_faligndata(t7, acc1);
       
  1450     acc1 = vis_faligndata(t6, acc1);
       
  1451     acc1 = vis_faligndata(t5, acc1);
       
  1452     acc1 = vis_faligndata(t4, acc1);
       
  1453     dp[0] = acc0;
       
  1454     dp[1] = acc1;
       
  1455     table = table0; table0 = table2;
       
  1456     table2 = table1; table1 = table;
       
  1457     sa += 2; i += 8; dp += 2;
       
  1458     s0 = s2;
       
  1459   }
       
  1460 
       
  1461   if (i <= xsize - 4) {
       
  1462     s1 = sa[0];
       
  1463     t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
       
  1464     t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
       
  1465     t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
       
  1466     t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
       
  1467     acc0 = vis_faligndata(t3, acc0);
       
  1468     acc0 = vis_faligndata(t2, acc0);
       
  1469     acc0 = vis_faligndata(t1, acc0);
       
  1470     acc0 = vis_faligndata(t0, acc0);
       
  1471     dp[0] = acc0;
       
  1472     table = table0; table0 = table1;
       
  1473     table1 = table2; table2 = table;
       
  1474     sa++; i += 4; dp++;
       
  1475     s0 = s1;
       
  1476   }
       
  1477 
       
  1478   sp = (mlib_u8*)sa;
       
  1479   sp -= 1;
       
  1480 
       
  1481   if ((mlib_addr) dp <= (mlib_addr) dend) {
       
  1482 
       
  1483     num = (mlib_u16*) dend - (mlib_u16*) dp;
       
  1484     sp  += num;
       
  1485     num ++;
       
  1486 
       
  1487     if (num == 1) {
       
  1488       s0 = (mlib_s32) *sp;
       
  1489       sp --;
       
  1490 
       
  1491       t0  = VIS_LD_U16_I(table0, 2*s0);
       
  1492       acc0 = vis_faligndata(t0, acc0);
       
  1493     } else if (num  == 2) {
       
  1494       s0 = (mlib_s32) *sp;
       
  1495       sp --;
       
  1496 
       
  1497       t0  = VIS_LD_U16_I(table1, 2*s0);
       
  1498       acc0 = vis_faligndata(t0, acc0);
       
  1499 
       
  1500       s0 = (mlib_s32) *sp;
       
  1501       sp --;
       
  1502 
       
  1503       t0  = VIS_LD_U16_I(table0, 2*s0);
       
  1504       acc0 = vis_faligndata(t0, acc0);
       
  1505     } else if (num == 3) {
       
  1506       s0 = (mlib_s32) *sp;
       
  1507       sp --;
       
  1508 
       
  1509       t0  = VIS_LD_U16_I(table2, 2*s0);
       
  1510       acc0 = vis_faligndata(t0, acc0);
       
  1511 
       
  1512       s0 = (mlib_s32) *sp;
       
  1513       sp --;
       
  1514 
       
  1515       t0  = VIS_LD_U16_I(table1, 2*s0);
       
  1516       acc0 = vis_faligndata(t0, acc0);
       
  1517 
       
  1518       s0 = (mlib_s32) *sp;
       
  1519       sp --;
       
  1520 
       
  1521       t0  = VIS_LD_U16_I(table0, 2*s0);
       
  1522       acc0 = vis_faligndata(t0, acc0);
       
  1523     }
       
  1524 
       
  1525     emask = vis_edge16(dp, dend);
       
  1526     vis_pst_16(acc0, dp, emask);
       
  1527   }
       
  1528 }
       
  1529 
       
  1530 /***************************************************************/
       
  1531 void mlib_v_ImageLookUp_U8_U16_3(const mlib_u8  *src,
       
  1532                                  mlib_s32       slb,
       
  1533                                  mlib_u16       *dst,
       
  1534                                  mlib_s32       dlb,
       
  1535                                  mlib_s32       xsize,
       
  1536                                  mlib_s32       ysize,
       
  1537                                  const mlib_u16 **table)
       
  1538 {
       
  1539   mlib_u8  *sl;
       
  1540   mlib_u16 *dl;
       
  1541   const mlib_u16 *tab;
       
  1542   mlib_s32 j, i;
       
  1543 
       
  1544   sl = (void *)src;
       
  1545   dl = dst;
       
  1546 
       
  1547   /* row loop */
       
  1548   for (j = 0; j < ysize; j ++) {
       
  1549     mlib_u8   *sp = sl;
       
  1550     mlib_u16  *dp = dl;
       
  1551     const mlib_u16  *tab0 = table[0];
       
  1552     const mlib_u16  *tab1 = table[1];
       
  1553     const mlib_u16  *tab2 = table[2];
       
  1554     mlib_s32  off, size = xsize * 3;
       
  1555 
       
  1556     off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
       
  1557 
       
  1558     off = (off < size) ? off : size;
       
  1559 
       
  1560     for (i = 0; i < off - 2; i += 3) {
       
  1561       *dp++ = tab0[(*sp++)];
       
  1562       *dp++ = tab1[(*sp++)];
       
  1563       *dp++ = tab2[(*sp++)];
       
  1564       size-=3;
       
  1565     }
       
  1566 
       
  1567     off -= i;
       
  1568 
       
  1569     if (off == 1) {
       
  1570       *dp++ = tab0[(*sp++)];
       
  1571       tab = tab0; tab0 = tab1;
       
  1572       tab1 = tab2; tab2 = tab;
       
  1573       size--;
       
  1574     } else if (off == 2) {
       
  1575       *dp++ = tab0[(*sp++)];
       
  1576       *dp++ = tab1[(*sp++)];
       
  1577       tab = tab2; tab2 = tab1;
       
  1578       tab1 = tab0; tab0 = tab;
       
  1579       size-=2;
       
  1580     }
       
  1581 
       
  1582     if (size > 0) {
       
  1583 
       
  1584       off = (mlib_addr)sp & 3;
       
  1585 
       
  1586       if (off == 0) {
       
  1587         mlib_v_ImageLookUp_U8_U16_3_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2);
       
  1588       } else if (off == 1) {
       
  1589         mlib_v_ImageLookUp_U8_U16_3_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2);
       
  1590       } else if (off == 2) {
       
  1591         mlib_v_ImageLookUp_U8_U16_3_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2);
       
  1592       } else {
       
  1593         mlib_v_ImageLookUp_U8_U16_3_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2);
       
  1594       }
       
  1595     }
       
  1596 
       
  1597     sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
       
  1598     dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
       
  1599   }
       
  1600 }
       
  1601 
       
  1602 /***************************************************************/